mirror of https://github.com/minio/minio.git
Merge pull request #167 from harshavardhana/pr_out_merge_with_intel_isal_changes_from_github_com_minio_io_isal
This commit is contained in:
commit
033ad56a61
5
Makefile
5
Makefile
|
@ -14,9 +14,6 @@ build-erasure:
|
||||||
@$(MAKE) $(MAKE_OPTIONS) -C pkg/erasure/isal lib
|
@$(MAKE) $(MAKE_OPTIONS) -C pkg/erasure/isal lib
|
||||||
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/erasure
|
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/erasure
|
||||||
|
|
||||||
build-signify:
|
|
||||||
@$(MAKE) $(MAKE_OPTIONS) -C pkg/signify
|
|
||||||
|
|
||||||
build-cpu:
|
build-cpu:
|
||||||
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/cpu
|
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/cpu
|
||||||
|
|
||||||
|
@ -53,7 +50,7 @@ build-storage-append:
|
||||||
build-storage-encoded:
|
build-storage-encoded:
|
||||||
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/storage/encodedstorage
|
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/storage/encodedstorage
|
||||||
|
|
||||||
cover: build-erasure build-signify build-split build-crc32c build-cpu build-scsi build-storage build-md5 build-sha1 build-sha256 build-sha512
|
cover: build-erasure build-split build-crc32c build-cpu build-scsi build-storage build-md5 build-sha1 build-sha256 build-sha512
|
||||||
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/gateway
|
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/gateway
|
||||||
|
|
||||||
install: build-erasure
|
install: build-erasure
|
||||||
|
|
|
@ -168,7 +168,7 @@ continue_block:
|
||||||
|
|
||||||
## branch into array
|
## branch into array
|
||||||
lea jump_table(%rip), bufp
|
lea jump_table(%rip), bufp
|
||||||
movzxw (bufp, %rax, 2), len
|
movzwq (bufp, %rax, 2), len
|
||||||
offset=crc_array-jump_table
|
offset=crc_array-jump_table
|
||||||
lea offset(bufp, len, 1), bufp
|
lea offset(bufp, len, 1), bufp
|
||||||
jmp *bufp
|
jmp *bufp
|
||||||
|
@ -194,18 +194,22 @@ full_block:
|
||||||
crc_array:
|
crc_array:
|
||||||
i=128
|
i=128
|
||||||
.rept 128-1
|
.rept 128-1
|
||||||
|
#if !defined(__clang__)
|
||||||
.altmacro
|
.altmacro
|
||||||
LABEL crc_ %i
|
LABEL crc_ %i
|
||||||
.noaltmacro
|
.noaltmacro
|
||||||
|
#endif
|
||||||
crc32q -i*8(block_0), crc_init
|
crc32q -i*8(block_0), crc_init
|
||||||
crc32q -i*8(block_1), crc1
|
crc32q -i*8(block_1), crc1
|
||||||
crc32q -i*8(block_2), crc2
|
crc32q -i*8(block_2), crc2
|
||||||
i=(i-1)
|
i=(i-1)
|
||||||
.endr
|
.endr
|
||||||
|
|
||||||
|
#if !defined(__clang__)
|
||||||
.altmacro
|
.altmacro
|
||||||
LABEL crc_ %i
|
LABEL crc_ %i
|
||||||
.noaltmacro
|
.noaltmacro
|
||||||
|
#endif
|
||||||
crc32q -i*8(block_0), crc_init
|
crc32q -i*8(block_0), crc_init
|
||||||
crc32q -i*8(block_1), crc1
|
crc32q -i*8(block_1), crc1
|
||||||
# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet
|
# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet
|
||||||
|
|
|
@ -14,9 +14,18 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#define HAS_SSE _has_sse41
|
||||||
|
#define HAS_AVX _has_avx
|
||||||
|
#define HAS_AVX2 _has_avx2
|
||||||
|
#else
|
||||||
|
#define HAS_SSE has_sse41
|
||||||
|
#define HAS_AVX has_avx
|
||||||
|
#define HAS_AVX2 has_avx2
|
||||||
|
#endif
|
||||||
|
|
||||||
.file "cpufeatures.c"
|
.file "cpufeatures.c"
|
||||||
.text
|
.text
|
||||||
.type cpuid, @function
|
|
||||||
cpuid:
|
cpuid:
|
||||||
.LFB2:
|
.LFB2:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
|
@ -56,10 +65,8 @@ cpuid:
|
||||||
ret
|
ret
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
.LFE2:
|
.LFE2:
|
||||||
.size cpuid, .-cpuid
|
.globl HAS_SSE
|
||||||
.globl has_sse41
|
HAS_SSE:
|
||||||
.type has_sse41, @function
|
|
||||||
has_sse41:
|
|
||||||
.LFB3:
|
.LFB3:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
pushq %rbp
|
pushq %rbp
|
||||||
|
@ -82,10 +89,8 @@ has_sse41:
|
||||||
ret
|
ret
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
.LFE3:
|
.LFE3:
|
||||||
.size has_sse41, .-has_sse41
|
.globl HAS_AVX
|
||||||
.globl has_avx
|
HAS_AVX:
|
||||||
.type has_avx, @function
|
|
||||||
has_avx:
|
|
||||||
.LFB4:
|
.LFB4:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
pushq %rbp
|
pushq %rbp
|
||||||
|
@ -108,10 +113,8 @@ has_avx:
|
||||||
ret
|
ret
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
.LFE4:
|
.LFE4:
|
||||||
.size has_avx, .-has_avx
|
.globl HAS_AVX2
|
||||||
.globl has_avx2
|
HAS_AVX2:
|
||||||
.type has_avx2, @function
|
|
||||||
has_avx2:
|
|
||||||
.LFB5:
|
.LFB5:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
pushq %rbp
|
pushq %rbp
|
||||||
|
@ -133,7 +136,3 @@ has_avx2:
|
||||||
.cfi_def_cfa 7, 8
|
.cfi_def_cfa 7, 8
|
||||||
ret
|
ret
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
.LFE5:
|
|
||||||
.size has_avx2, .-has_avx2
|
|
||||||
.ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
|
|
||||||
.section .note.GNU-stack,"",@progbits
|
|
||||||
|
|
|
@ -1,12 +1,17 @@
|
||||||
all: build test
|
all: build test
|
||||||
.PHONY: all
|
.PHONY: all
|
||||||
|
|
||||||
|
SYSTEM_NAME := $(shell uname -s)
|
||||||
|
|
||||||
test:
|
test:
|
||||||
@godep go test -race -coverprofile=cover.out
|
@godep go test -race -coverprofile=cover.out
|
||||||
|
|
||||||
isal/isal-l.a:
|
isal/isal-l.a:
|
||||||
|
ifeq ($(SYSTEM_NAME), Darwin)
|
||||||
|
@$(MAKE) -C isal arch=osx lib
|
||||||
|
else
|
||||||
@$(MAKE) -C isal lib
|
@$(MAKE) -C isal lib
|
||||||
|
endif
|
||||||
build: isal/isal-l.a
|
build: isal/isal-l.a
|
||||||
@godep go build
|
@godep go build
|
||||||
|
|
||||||
|
|
|
@ -60,6 +60,8 @@ int32_t minio_get_source_target (int errs, int k, int m,
|
||||||
|
|
||||||
*source = tmp_source;
|
*source = tmp_source;
|
||||||
*target = tmp_target;
|
*target = tmp_target;
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1,3 +1,22 @@
|
||||||
*.o
|
*.o
|
||||||
*.a
|
*.a
|
||||||
*.so
|
*.so
|
||||||
|
*~
|
||||||
|
*.dSYM
|
||||||
|
erasure-code-base-test
|
||||||
|
erasure-code-sse-test
|
||||||
|
erasure-code-test
|
||||||
|
gf-2vect-dot-prod-sse-test
|
||||||
|
gf-3vect-dot-prod-sse-test
|
||||||
|
gf-4vect-dot-prod-sse-test
|
||||||
|
gf-5vect-dot-prod-sse-test
|
||||||
|
gf-6vect-dot-prod-sse-test
|
||||||
|
gf-inverse-test
|
||||||
|
gf-vect-dot-prod-avx-test
|
||||||
|
gf-vect-dot-prod-base-test
|
||||||
|
gf-vect-dot-prod-sse-test
|
||||||
|
gf-vect-dot-prod-test
|
||||||
|
gf-vect-mul-avx-test
|
||||||
|
gf-vect-mul-base-test
|
||||||
|
gf-vect-mul-sse-test
|
||||||
|
gf-vect-mul-test
|
||||||
|
|
|
@ -30,7 +30,7 @@
|
||||||
|
|
||||||
units = src
|
units = src
|
||||||
|
|
||||||
default: slib
|
default: lib
|
||||||
|
|
||||||
include $(foreach unit,$(units), $(unit)/Makefile)
|
include $(foreach unit,$(units), $(unit)/Makefile)
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,7 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef __unix__
|
#if !defined(__unix__) && !defined(__APPLE__)
|
||||||
#ifdef __MINGW32__
|
#ifdef __MINGW32__
|
||||||
# include <_mingw.h>
|
# include <_mingw.h>
|
||||||
#endif
|
#endif
|
||||||
|
@ -59,7 +59,7 @@ typedef unsigned char UINT8;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef __unix__
|
#if defined(__unix__) || defined(__APPLE__)
|
||||||
# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
|
# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
|
||||||
# define __forceinline static inline
|
# define __forceinline static inline
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -63,6 +63,11 @@ ASFLAGS_win64 = -f win64
|
||||||
CFLAGS_icl = -Qstd=c99
|
CFLAGS_icl = -Qstd=c99
|
||||||
ARFLAGS_win64 = -out:$@
|
ARFLAGS_win64 = -out:$@
|
||||||
|
|
||||||
|
# arch=osx build options
|
||||||
|
ASFLAGS_osx = -f macho64
|
||||||
|
ARFLAGS_osx = -r $@
|
||||||
|
STRIP_gcc = strip -d $@
|
||||||
|
|
||||||
# arch=mingw build options
|
# arch=mingw build options
|
||||||
ASFLAGS_mingw = -f win64
|
ASFLAGS_mingw = -f win64
|
||||||
ARFLAGS_mingw = cr $@
|
ARFLAGS_mingw = cr $@
|
||||||
|
@ -101,9 +106,13 @@ ifeq ($(arch),win64)
|
||||||
lib_name := $(basename $(lib_name)).lib
|
lib_name := $(basename $(lib_name)).lib
|
||||||
endif
|
endif
|
||||||
lsrcwin64 = $(lsrc)
|
lsrcwin64 = $(lsrc)
|
||||||
|
lsrcosx = $(lsrc)
|
||||||
unit_testswin64 = $(unit_tests)
|
unit_testswin64 = $(unit_tests)
|
||||||
|
unit_testsosx = $(unit_tests)
|
||||||
exampleswin64 = $(examples)
|
exampleswin64 = $(examples)
|
||||||
|
examplesosx = $(examples)
|
||||||
perf_testswin64 = $(perf_tests)
|
perf_testswin64 = $(perf_tests)
|
||||||
|
perf_testsosx = $(perf_tests)
|
||||||
|
|
||||||
# Build and run unit tests, performance tests, etc.
|
# Build and run unit tests, performance tests, etc.
|
||||||
all_tests = $(sort $(perf_tests$(arch)) $(unit_tests$(arch)) $(examples$(arch)) $(other_tests))
|
all_tests = $(sort $(perf_tests$(arch)) $(unit_tests$(arch)) $(examples$(arch)) $(other_tests))
|
||||||
|
@ -199,7 +208,6 @@ perf_report:
|
||||||
@echo Summary:
|
@echo Summary:
|
||||||
-grep runtime $(rpt_name)
|
-grep runtime $(rpt_name)
|
||||||
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@echo Cleaning up
|
@echo Cleaning up
|
||||||
@$(RM) -r $(O)/*.o *.a $(all_tests) $(lib_name) $(so_lib_name)
|
@$(RM) -r $(O)/*.o *.a $(all_tests) $(lib_name) $(so_lib_name)
|
||||||
|
|
|
@ -33,6 +33,42 @@
|
||||||
%define WRT_OPT
|
%define WRT_OPT
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define EC_ENCODE_DATA_SSE _ec_encode_data_sse
|
||||||
|
%define EC_ENCODE_DATA_AVX _ec_encode_data_avx
|
||||||
|
%define EC_ENCODE_DATA_AVX2 _ec_encode_data_avx2
|
||||||
|
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
|
||||||
|
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
|
||||||
|
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
|
||||||
|
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
|
||||||
|
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
|
||||||
|
%define GF_VECT_MUL_BASE _gf_vect_mul_base
|
||||||
|
%define EC_ENCODE_DATA_BASE _ec_encode_data_base
|
||||||
|
%define GF_VECT_DOT_PROD_BASE _gf_vect_dot_prod_base
|
||||||
|
|
||||||
|
%define EC_ENCODE_DATA _ec_encode_data
|
||||||
|
%define GF_VECT_MUL _gf_vect_mul
|
||||||
|
%define GF_VECT_DOT_PROD _gf_vect_dot_prod
|
||||||
|
|
||||||
|
%else
|
||||||
|
%define EC_ENCODE_DATA_SSE ec_encode_data_sse
|
||||||
|
%define EC_ENCODE_DATA_AVX ec_encode_data_avx
|
||||||
|
%define EC_ENCODE_DATA_AVX2 ec_encode_data_avx2
|
||||||
|
%define GF_VECT_MUL_SSE gf_vect_mul_sse
|
||||||
|
%define GF_VECT_MUL_AVX gf_vect_mul_avx
|
||||||
|
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
|
||||||
|
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
|
||||||
|
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
|
||||||
|
%define GF_VECT_MUL_BASE gf_vect_mul_base
|
||||||
|
%define EC_ENCODE_DATA_BASE ec_encode_data_base
|
||||||
|
%define GF_VECT_DOT_PROD_BASE gf_vect_dot_prod_base
|
||||||
|
|
||||||
|
%define EC_ENCODE_DATA ec_encode_data
|
||||||
|
%define GF_VECT_MUL gf_vect_mul
|
||||||
|
%define GF_VECT_DOT_PROD gf_vect_dot_prod
|
||||||
|
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf32
|
%ifidn __OUTPUT_FORMAT__, elf32
|
||||||
|
|
||||||
[bits 32]
|
[bits 32]
|
||||||
|
@ -51,19 +87,19 @@ default rel
|
||||||
%define wrd_sz qword
|
%define wrd_sz qword
|
||||||
%define arg1 rsi
|
%define arg1 rsi
|
||||||
|
|
||||||
extern ec_encode_data_sse
|
extern EC_ENCODE_DATA_SSE
|
||||||
extern ec_encode_data_avx
|
extern EC_ENCODE_DATA_AVX
|
||||||
extern ec_encode_data_avx2
|
extern EC_ENCODE_DATA_AVX2
|
||||||
extern gf_vect_mul_sse
|
extern GF_VECT_MUL_SSE
|
||||||
extern gf_vect_mul_avx
|
extern GF_VECT_MUL_AVX
|
||||||
extern gf_vect_dot_prod_sse
|
extern GF_VECT_DOT_PROD_SSE
|
||||||
extern gf_vect_dot_prod_avx
|
extern GF_VECT_DOT_PROD_AVX
|
||||||
extern gf_vect_dot_prod_avx2
|
extern GF_VECT_DOT_PROD_AVX2
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
extern gf_vect_mul_base
|
extern GF_VECT_MUL_BASE
|
||||||
extern ec_encode_data_base
|
extern EC_ENCODE_DATA_BASE
|
||||||
extern gf_vect_dot_prod_base
|
extern GF_VECT_DOT_PROD_BASE
|
||||||
|
|
||||||
section .data
|
section .data
|
||||||
;;; *_mbinit are initial values for *_dispatched; is updated on first call.
|
;;; *_mbinit are initial values for *_dispatched; is updated on first call.
|
||||||
|
@ -82,33 +118,33 @@ section .text
|
||||||
;;;;
|
;;;;
|
||||||
; ec_encode_data multibinary function
|
; ec_encode_data multibinary function
|
||||||
;;;;
|
;;;;
|
||||||
global ec_encode_data:function
|
global EC_ENCODE_DATA:function
|
||||||
ec_encode_data_mbinit:
|
ec_encode_data_mbinit:
|
||||||
call ec_encode_data_dispatch_init
|
call ec_encode_data_dispatch_init
|
||||||
|
|
||||||
ec_encode_data:
|
EC_ENCODE_DATA:
|
||||||
jmp wrd_sz [ec_encode_data_dispatched]
|
jmp wrd_sz [ec_encode_data_dispatched]
|
||||||
|
|
||||||
ec_encode_data_dispatch_init:
|
ec_encode_data_dispatch_init:
|
||||||
push arg1
|
push arg1
|
||||||
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||||
lea arg1, [ec_encode_data_base]
|
lea arg1, [EC_ENCODE_DATA_BASE]
|
||||||
%else
|
%else
|
||||||
push rax
|
push rax
|
||||||
push rbx
|
push rbx
|
||||||
push rcx
|
push rcx
|
||||||
push rdx
|
push rdx
|
||||||
lea arg1, [ec_encode_data_base WRT_OPT] ; Default
|
lea arg1, [EC_ENCODE_DATA_BASE WRT_OPT] ; Default
|
||||||
|
|
||||||
mov eax, 1
|
mov eax, 1
|
||||||
cpuid
|
cpuid
|
||||||
lea rbx, [ec_encode_data_sse WRT_OPT]
|
lea rbx, [EC_ENCODE_DATA_BASE WRT_OPT]
|
||||||
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||||
cmovne arg1, rbx
|
cmovne arg1, rbx
|
||||||
|
|
||||||
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||||
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||||
lea rbx, [ec_encode_data_avx WRT_OPT]
|
lea rbx, [EC_ENCODE_DATA_AVX WRT_OPT]
|
||||||
|
|
||||||
jne _done_ec_encode_data_init
|
jne _done_ec_encode_data_init
|
||||||
mov rsi, rbx
|
mov rsi, rbx
|
||||||
|
@ -118,7 +154,7 @@ ec_encode_data_dispatch_init:
|
||||||
mov eax, 7
|
mov eax, 7
|
||||||
cpuid
|
cpuid
|
||||||
test ebx, FLAG_CPUID1_EBX_AVX2
|
test ebx, FLAG_CPUID1_EBX_AVX2
|
||||||
lea rbx, [ec_encode_data_avx2 WRT_OPT]
|
lea rbx, [EC_ENCODE_DATA_AVX2 WRT_OPT]
|
||||||
cmovne rsi, rbx
|
cmovne rsi, rbx
|
||||||
|
|
||||||
;; Does it have xmm and ymm support
|
;; Does it have xmm and ymm support
|
||||||
|
@ -127,7 +163,7 @@ ec_encode_data_dispatch_init:
|
||||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||||
je _done_ec_encode_data_init
|
je _done_ec_encode_data_init
|
||||||
lea rsi, [ec_encode_data_sse WRT_OPT]
|
lea rsi, [EC_ENCODE_DATA_SSE WRT_OPT]
|
||||||
|
|
||||||
_done_ec_encode_data_init:
|
_done_ec_encode_data_init:
|
||||||
pop rdx
|
pop rdx
|
||||||
|
@ -142,30 +178,30 @@ _done_ec_encode_data_init:
|
||||||
;;;;
|
;;;;
|
||||||
; gf_vect_mul multibinary function
|
; gf_vect_mul multibinary function
|
||||||
;;;;
|
;;;;
|
||||||
global gf_vect_mul:function
|
global GF_VECT_MUL:function
|
||||||
gf_vect_mul_mbinit:
|
gf_vect_mul_mbinit:
|
||||||
call gf_vect_mul_dispatch_init
|
call gf_vect_mul_dispatch_init
|
||||||
|
|
||||||
gf_vect_mul:
|
GF_VECT_MUL:
|
||||||
jmp wrd_sz [gf_vect_mul_dispatched]
|
jmp wrd_sz [gf_vect_mul_dispatched]
|
||||||
|
|
||||||
gf_vect_mul_dispatch_init:
|
gf_vect_mul_dispatch_init:
|
||||||
push arg1
|
push arg1
|
||||||
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||||
lea arg1, [gf_vect_mul_base]
|
lea arg1, [GF_VECT_MUL_BASE]
|
||||||
%else
|
%else
|
||||||
push rax
|
push rax
|
||||||
push rbx
|
push rbx
|
||||||
push rcx
|
push rcx
|
||||||
push rdx
|
push rdx
|
||||||
lea arg1, [gf_vect_mul_base WRT_OPT] ; Default
|
lea arg1, [GF_VECT_MUL_BASE WRT_OPT] ; Default
|
||||||
|
|
||||||
mov eax, 1
|
mov eax, 1
|
||||||
cpuid
|
cpuid
|
||||||
test ecx, FLAG_CPUID1_ECX_SSE4_2
|
test ecx, FLAG_CPUID1_ECX_SSE4_2
|
||||||
lea rbx, [gf_vect_mul_sse WRT_OPT]
|
lea rbx, [GF_VECT_MUL_SSE WRT_OPT]
|
||||||
je _done_gf_vect_mul_dispatch_init
|
je _done_gf_vect_mul_dispatch_init
|
||||||
mov arg1, rbx
|
mov arg1, rbx
|
||||||
|
|
||||||
;; Try for AVX
|
;; Try for AVX
|
||||||
and ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
|
and ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
|
||||||
|
@ -178,49 +214,49 @@ gf_vect_mul_dispatch_init:
|
||||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||||
jne _done_gf_vect_mul_dispatch_init
|
jne _done_gf_vect_mul_dispatch_init
|
||||||
lea arg1, [gf_vect_mul_avx WRT_OPT]
|
lea arg1, [GF_VECT_MUL_AVX WRT_OPT]
|
||||||
|
|
||||||
_done_gf_vect_mul_dispatch_init:
|
_done_gf_vect_mul_dispatch_init:
|
||||||
pop rdx
|
pop rdx
|
||||||
pop rcx
|
pop rcx
|
||||||
pop rbx
|
pop rbx
|
||||||
pop rax
|
pop rax
|
||||||
%endif ;; END 32-bit check
|
%endif ;; END 32-bit check
|
||||||
mov [gf_vect_mul_dispatched], arg1
|
mov [gf_vect_mul_dispatched], arg1
|
||||||
pop arg1
|
pop arg1
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
;;;;
|
;;;;
|
||||||
; gf_vect_dot_prod multibinary function
|
; gf_vect_dot_prod multibinary function
|
||||||
;;;;
|
;;;;
|
||||||
global gf_vect_dot_prod:function
|
global GF_VECT_DOT_PROD:function
|
||||||
gf_vect_dot_prod_mbinit:
|
gf_vect_dot_prod_mbinit:
|
||||||
call gf_vect_dot_prod_dispatch_init
|
call gf_vect_dot_prod_dispatch_init
|
||||||
|
|
||||||
gf_vect_dot_prod:
|
GF_VECT_DOT_PROD:
|
||||||
jmp wrd_sz [gf_vect_dot_prod_dispatched]
|
jmp wrd_sz [gf_vect_dot_prod_dispatched]
|
||||||
|
|
||||||
gf_vect_dot_prod_dispatch_init:
|
gf_vect_dot_prod_dispatch_init:
|
||||||
push arg1
|
push arg1
|
||||||
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||||
lea arg1, [gf_vect_dot_prod_base]
|
lea arg1, [GF_VECT_DOT_PROD_BASE]
|
||||||
%else
|
%else
|
||||||
push rax
|
push rax
|
||||||
push rbx
|
push rbx
|
||||||
push rcx
|
push rcx
|
||||||
push rdx
|
push rdx
|
||||||
lea arg1, [gf_vect_dot_prod_base WRT_OPT] ; Default
|
lea arg1, [GF_VECT_DOT_PROD_BASE WRT_OPT] ; Default
|
||||||
|
|
||||||
mov eax, 1
|
mov eax, 1
|
||||||
cpuid
|
cpuid
|
||||||
lea rbx, [gf_vect_dot_prod_sse WRT_OPT]
|
lea rbx, [GF_VECT_DOT_PROD_SSE WRT_OPT]
|
||||||
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||||
cmovne arg1, rbx
|
cmovne arg1, rbx
|
||||||
|
|
||||||
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||||
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||||
lea rbx, [gf_vect_dot_prod_avx WRT_OPT]
|
lea rbx, [GF_VECT_DOT_PROD_AVX WRT_OPT]
|
||||||
|
|
||||||
jne _done_gf_vect_dot_prod_init
|
jne _done_gf_vect_dot_prod_init
|
||||||
mov rsi, rbx
|
mov rsi, rbx
|
||||||
|
@ -230,7 +266,7 @@ gf_vect_dot_prod_dispatch_init:
|
||||||
mov eax, 7
|
mov eax, 7
|
||||||
cpuid
|
cpuid
|
||||||
test ebx, FLAG_CPUID1_EBX_AVX2
|
test ebx, FLAG_CPUID1_EBX_AVX2
|
||||||
lea rbx, [gf_vect_dot_prod_avx2 WRT_OPT]
|
lea rbx, [GF_VECT_DOT_PROD_AVX2 WRT_OPT]
|
||||||
cmovne rsi, rbx
|
cmovne rsi, rbx
|
||||||
|
|
||||||
;; Does it have xmm and ymm support
|
;; Does it have xmm and ymm support
|
||||||
|
@ -238,8 +274,8 @@ gf_vect_dot_prod_dispatch_init:
|
||||||
xgetbv
|
xgetbv
|
||||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||||
je _done_gf_vect_dot_prod_init
|
je _done_gf_vect_dot_prod_init
|
||||||
lea rsi, [gf_vect_dot_prod_sse WRT_OPT]
|
lea rsi, [GF_VECT_DOT_PROD_SSE WRT_OPT]
|
||||||
|
|
||||||
_done_gf_vect_dot_prod_init:
|
_done_gf_vect_dot_prod_init:
|
||||||
pop rdx
|
pop rdx
|
||||||
|
@ -261,6 +297,6 @@ global %1_slver
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion ec_encode_data, 00, 02, 0133
|
slversion EC_ENCODE_DATA, 00, 02, 0133
|
||||||
slversion gf_vect_mul, 00, 02, 0134
|
slversion GF_VECT_MUL, 00, 02, 0134
|
||||||
slversion gf_vect_dot_prod, 00, 01, 0138
|
slversion GF_VECT_DOT_PROD, 00, 01, 0138
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_2VECT_DOT_PROD_AVX _gf_2vect_dot_prod_avx
|
||||||
|
%else
|
||||||
|
%define GF_2VECT_DOT_PROD_AVX gf_2vect_dot_prod_avx
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -58,6 +63,31 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r9
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -143,9 +173,8 @@ section .text
|
||||||
%define xp2 xmm3
|
%define xp2 xmm3
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_2vect_dot_prod_avx:function
|
global GF_2VECT_DOT_PROD_AVX:function
|
||||||
|
func(GF_2VECT_DOT_PROD_AVX)
|
||||||
func(gf_2vect_dot_prod_avx)
|
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 16
|
sub len, 16
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -231,4 +260,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_2vect_dot_prod_avx, 02, 03, 0191
|
slversion GF_2VECT_DOT_PROD_AVX, 02, 03, 0191
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_2VECT_DOT_PROD_AVX2 _gf_2vect_dot_prod_avx2
|
||||||
|
%else
|
||||||
|
%define GF_2VECT_DOT_PROD_AVX2 gf_2vect_dot_prod_avx2
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -60,6 +65,33 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r9
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -149,9 +181,8 @@ section .text
|
||||||
%define xp2 ymm3
|
%define xp2 ymm3
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_2vect_dot_prod_avx2:function
|
global GF_2VECT_DOT_PROD_AVX2:function
|
||||||
|
func(GF_2VECT_DOT_PROD_AVX2)
|
||||||
func(gf_2vect_dot_prod_avx2)
|
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 32
|
sub len, 32
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -243,4 +274,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_2vect_dot_prod_avx2, 04, 03, 0196
|
slversion GF_2VECT_DOT_PROD_AVX2, 04, 03, 0196
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_2VECT_DOT_PROD_SSE _gf_2vect_dot_prod_sse
|
||||||
|
%else
|
||||||
|
%define GF_2VECT_DOT_PROD_SSE gf_2vect_dot_prod_sse
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -58,6 +63,31 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r9
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -144,9 +174,8 @@ section .text
|
||||||
%define xp2 xmm3
|
%define xp2 xmm3
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_2vect_dot_prod_sse:function
|
global GF_2VECT_DOT_PROD_SSE:function
|
||||||
|
func(GF_2VECT_DOT_PROD_SSE)
|
||||||
func(gf_2vect_dot_prod_sse)
|
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 16
|
sub len, 16
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -233,4 +262,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_2vect_dot_prod_sse, 00, 02, 0062
|
slversion GF_2VECT_DOT_PROD_SSE, 00, 02, 0062
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_3VECT_DOT_PROD_AVX _gf_3vect_dot_prod_avx
|
||||||
|
%else
|
||||||
|
%define GF_3VECT_DOT_PROD_AVX gf_3vect_dot_prod_avx
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -60,6 +65,33 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -157,8 +189,8 @@ section .text
|
||||||
%define xp3 xmm4
|
%define xp3 xmm4
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_3vect_dot_prod_avx:function
|
global GF_3VECT_DOT_PROD_AVX:function
|
||||||
func(gf_3vect_dot_prod_avx)
|
func(GF_3VECT_DOT_PROD_AVX)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 16
|
sub len, 16
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -255,4 +287,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_3vect_dot_prod_avx, 02, 03, 0192
|
slversion GF_3VECT_DOT_PROD_AVX, 02, 03, 0192
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_3VECT_DOT_PROD_AVX2 _gf_3vect_dot_prod_avx2
|
||||||
|
%else
|
||||||
|
%define GF_3VECT_DOT_PROD_AVX2 gf_3vect_dot_prod_avx2
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -62,6 +67,35 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -162,8 +196,8 @@ section .text
|
||||||
%define xp3 ymm4
|
%define xp3 ymm4
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_3vect_dot_prod_avx2:function
|
global GF_3VECT_DOT_PROD_AVX2:function
|
||||||
func(gf_3vect_dot_prod_avx2)
|
func(GF_3VECT_DOT_PROD_AVX2)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 32
|
sub len, 32
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -268,4 +302,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_3vect_dot_prod_avx2, 04, 03, 0197
|
slversion GF_3VECT_DOT_PROD_AVX2, 04, 03, 0197
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_3VECT_DOT_PROD_SSE _gf_3vect_dot_prod_sse
|
||||||
|
%else
|
||||||
|
%define GF_3VECT_DOT_PROD_SSE gf_3vect_dot_prod_sse
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -60,6 +65,33 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -157,8 +189,8 @@ section .text
|
||||||
%define xp3 xmm4
|
%define xp3 xmm4
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_3vect_dot_prod_sse:function
|
global GF_3VECT_DOT_PROD_SSE:function
|
||||||
func(gf_3vect_dot_prod_sse)
|
func(GF_3VECT_DOT_PROD_SSE)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 16
|
sub len, 16
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -256,4 +288,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_3vect_dot_prod_sse, 00, 03, 0063
|
slversion GF_3VECT_DOT_PROD_SSE, 00, 03, 0063
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_4VECT_DOT_PROD_AVX _gf_4vect_dot_prod_avx
|
||||||
|
%else
|
||||||
|
%define GF_4VECT_DOT_PROD_AVX gf_4vect_dot_prod_avx
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -66,6 +71,39 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -182,8 +220,8 @@ section .text
|
||||||
%define xp4 xmm5
|
%define xp4 xmm5
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_4vect_dot_prod_avx:function
|
global GF_4VECT_DOT_PROD_AVX:function
|
||||||
func(gf_4vect_dot_prod_avx)
|
func(GF_4VECT_DOT_PROD_AVX)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 16
|
sub len, 16
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -293,4 +331,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_4vect_dot_prod_avx, 00, 02, 0064
|
slversion GF_4VECT_DOT_PROD_AVX, 00, 02, 0064
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_4VECT_DOT_PROD_AVX2 _gf_4vect_dot_prod_avx2
|
||||||
|
%else
|
||||||
|
%define GF_4VECT_DOT_PROD_AVX2 gf_4vect_dot_prod_avx2
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -68,6 +73,41 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -187,8 +227,8 @@ section .text
|
||||||
%define xp4 ymm5
|
%define xp4 ymm5
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_4vect_dot_prod_avx2:function
|
global GF_4VECT_DOT_PROD_AVX2:function
|
||||||
func(gf_4vect_dot_prod_avx2)
|
func(GF_4VECT_DOT_PROD_AVX2)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 32
|
sub len, 32
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -302,4 +342,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_4vect_dot_prod_avx2, 04, 03, 0064
|
slversion GF_4VECT_DOT_PROD_AVX2, 04, 03, 0064
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_4VECT_DOT_PROD_SSE _gf_4vect_dot_prod_sse
|
||||||
|
%else
|
||||||
|
%define GF_4VECT_DOT_PROD_SSE gf_4vect_dot_prod_sse
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -66,6 +71,39 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -182,8 +220,8 @@ section .text
|
||||||
%define xp4 xmm5
|
%define xp4 xmm5
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_4vect_dot_prod_sse:function
|
global GF_4VECT_DOT_PROD_SSE:function
|
||||||
func(gf_4vect_dot_prod_sse)
|
func(GF_4VECT_DOT_PROD_SSE)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 16
|
sub len, 16
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -293,4 +331,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_4vect_dot_prod_sse, 00, 03, 0064
|
slversion GF_4VECT_DOT_PROD_SSE, 00, 03, 0064
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_5VECT_DOT_PROD_AVX _gf_5vect_dot_prod_avx
|
||||||
|
%else
|
||||||
|
%define GF_5VECT_DOT_PROD_AVX gf_5vect_dot_prod_avx
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -66,6 +71,39 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_5VECT_DOT_PROD_AVX2 _gf_5vect_dot_prod_avx2
|
||||||
|
%else
|
||||||
|
%define GF_5VECT_DOT_PROD_AVX2 gf_5vect_dot_prod_avx2
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -68,6 +73,41 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -189,8 +229,8 @@ section .text
|
||||||
%define xp5 ymm6
|
%define xp5 ymm6
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_5vect_dot_prod_avx2:function
|
global GF_5VECT_DOT_PROD_AVX2:function
|
||||||
func(gf_5vect_dot_prod_avx2)
|
func(GF_5VECT_DOT_PROD_AVX2)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 32
|
sub len, 32
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -320,4 +360,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_5vect_dot_prod_avx2, 04, 03, 0199
|
slversion GF_5VECT_DOT_PROD_AVX2, 04, 03, 0199
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_5VECT_DOT_PROD_SSE _gf_5vect_dot_prod_sse
|
||||||
|
%else
|
||||||
|
%define GF_5VECT_DOT_PROD_SSE gf_5vect_dot_prod_sse
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -66,6 +71,39 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -184,8 +222,8 @@ section .text
|
||||||
%define xp5 xmm6
|
%define xp5 xmm6
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_5vect_dot_prod_sse:function
|
global GF_5VECT_DOT_PROD_SSE:function
|
||||||
func(gf_5vect_dot_prod_sse)
|
func(GF_5VECT_DOT_PROD_SSE)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 16
|
sub len, 16
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -309,4 +347,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_5vect_dot_prod_sse, 00, 03, 0065
|
slversion GF_5VECT_DOT_PROD_SSE, 00, 03, 0065
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_6VECT_DOT_PROD_AVX _gf_6vect_dot_prod_avx
|
||||||
|
%else
|
||||||
|
%define GF_6VECT_DOT_PROD_AVX gf_6vect_dot_prod_avx
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -66,6 +71,39 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -182,8 +220,8 @@ section .text
|
||||||
%define xp6 xmm7
|
%define xp6 xmm7
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_6vect_dot_prod_avx:function
|
global GF_6VECT_DOT_PROD_AVX:function
|
||||||
func(gf_6vect_dot_prod_avx)
|
func(GF_6VECT_DOT_PROD_AVX)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 16
|
sub len, 16
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -320,4 +358,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_6vect_dot_prod_avx, 02, 03, 0195
|
slversion GF_6VECT_DOT_PROD_AVX, 02, 03, 0195
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_6VECT_DOT_PROD_AVX2 _gf_6vect_dot_prod_avx2
|
||||||
|
%else
|
||||||
|
%define GF_6VECT_DOT_PROD_AVX2 gf_6vect_dot_prod_avx2
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -68,6 +73,41 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_6VECT_DOT_PROD_SSE _gf_6vect_dot_prod_sse
|
||||||
|
%else
|
||||||
|
%define GF_6VECT_DOT_PROD_SSE gf_6vect_dot_prod_sse
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -66,6 +71,39 @@
|
||||||
%endmacro
|
%endmacro
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -182,8 +220,8 @@ section .text
|
||||||
%define xp6 xmm7
|
%define xp6 xmm7
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_6vect_dot_prod_sse:function
|
global GF_6VECT_DOT_PROD_SSE:function
|
||||||
func(gf_6vect_dot_prod_sse)
|
func(GF_6VECT_DOT_PROD_SSE)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 16
|
sub len, 16
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -320,4 +358,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_6vect_dot_prod_sse, 00, 03, 0066
|
slversion GF_6VECT_DOT_PROD_SSE, 00, 03, 0066
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
|
||||||
|
%else
|
||||||
|
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -51,6 +56,24 @@
|
||||||
%define FUNC_RESTORE
|
%define FUNC_RESTORE
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r9
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define func(x) x:
|
||||||
|
%define FUNC_SAVE
|
||||||
|
%define FUNC_RESTORE
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -121,8 +144,8 @@ section .text
|
||||||
%define xp xmm2
|
%define xp xmm2
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_vect_dot_prod_avx:function
|
global GF_VECT_DOT_PROD_AVX:function
|
||||||
func(gf_vect_dot_prod_avx)
|
func(GF_VECT_DOT_PROD_AVX)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 16
|
sub len, 16
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -195,4 +218,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_vect_dot_prod_avx, 02, 03, 0061
|
slversion GF_VECT_DOT_PROD_AVX, 02, 03, 0061
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
|
||||||
|
%else
|
||||||
|
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -53,6 +58,26 @@
|
||||||
%define FUNC_RESTORE
|
%define FUNC_RESTORE
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r9
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define func(x) x:
|
||||||
|
%define FUNC_SAVE
|
||||||
|
%define FUNC_RESTORE
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -126,8 +151,8 @@ section .text
|
||||||
%define xp ymm2
|
%define xp ymm2
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_vect_dot_prod_avx2:function
|
global GF_VECT_DOT_PROD_AVX2:function
|
||||||
func(gf_vect_dot_prod_avx2)
|
func(GF_VECT_DOT_PROD_AVX2)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 32
|
sub len, 32
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -200,4 +225,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_vect_dot_prod_avx2, 04, 03, 0190
|
slversion GF_VECT_DOT_PROD_AVX2, 04, 03, 0190
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
|
||||||
|
%else
|
||||||
|
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -50,6 +55,23 @@
|
||||||
%define FUNC_RESTORE
|
%define FUNC_RESTORE
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r9
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define func(x) x:
|
||||||
|
%define FUNC_SAVE
|
||||||
|
%define FUNC_RESTORE
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -121,8 +143,8 @@ section .text
|
||||||
%define xp xmm2
|
%define xp xmm2
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_vect_dot_prod_sse:function
|
global GF_VECT_DOT_PROD_SSE:function
|
||||||
func(gf_vect_dot_prod_sse)
|
func(GF_VECT_DOT_PROD_SSE)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
sub len, 16
|
sub len, 16
|
||||||
jl .return_fail
|
jl .return_fail
|
||||||
|
@ -192,4 +214,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_vect_dot_prod_sse, 00, 03, 0060
|
slversion GF_VECT_DOT_PROD_SSE, 00, 03, 0060
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
|
||||||
|
%else
|
||||||
|
%define GF_VECT_MUL_AVX gf_vect_mul_avx
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -46,6 +51,19 @@
|
||||||
%define FUNC_SAVE
|
%define FUNC_SAVE
|
||||||
%define FUNC_RESTORE
|
%define FUNC_RESTORE
|
||||||
|
|
||||||
|
%elifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
%define tmp r11
|
||||||
|
%define return rax
|
||||||
|
%define func(x) x:
|
||||||
|
%define FUNC_SAVE
|
||||||
|
%define FUNC_RESTORE
|
||||||
|
|
||||||
%elifidn __OUTPUT_FORMAT__, win64
|
%elifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -111,8 +129,8 @@ section .text
|
||||||
%define xtmp2c xmm7
|
%define xtmp2c xmm7
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_vect_mul_avx:function
|
global GF_VECT_MUL_AVX:function
|
||||||
func(gf_vect_mul_avx)
|
func(GF_VECT_MUL_AVX)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
mov pos, 0
|
mov pos, 0
|
||||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
@ -169,4 +187,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_vect_mul_avx, 01, 02, 0036
|
slversion GF_VECT_MUL_AVX, 01, 02, 0036
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
;
|
;
|
||||||
; Redistribution and use in source and binary forms, with or without
|
; Redistribution and use in source and binary forms, with or without
|
||||||
; modification, are permitted provided that the following conditions
|
; modification, are permitted provided that the following conditions
|
||||||
; are met:
|
; are met:
|
||||||
; * Redistributions of source code must retain the above copyright
|
; * Redistributions of source code must retain the above copyright
|
||||||
; notice, this list of conditions and the following disclaimer.
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
@ -32,6 +32,11 @@
|
||||||
;;;
|
;;;
|
||||||
;;; Author: Gregory Tucker
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
|
||||||
|
%else
|
||||||
|
%define GF_VECT_MUL_SSE gf_vect_mul_sse
|
||||||
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, elf64
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
%define arg0 rdi
|
%define arg0 rdi
|
||||||
|
@ -46,6 +51,19 @@
|
||||||
%define FUNC_SAVE
|
%define FUNC_SAVE
|
||||||
%define FUNC_RESTORE
|
%define FUNC_RESTORE
|
||||||
|
|
||||||
|
%elifidn __OUTPUT_FORMAT__, macho64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
%define tmp r11
|
||||||
|
%define return rax
|
||||||
|
%define func(x) x:
|
||||||
|
%define FUNC_SAVE
|
||||||
|
%define FUNC_RESTORE
|
||||||
|
|
||||||
%elifidn __OUTPUT_FORMAT__, win64
|
%elifidn __OUTPUT_FORMAT__, win64
|
||||||
%define arg0 rcx
|
%define arg0 rcx
|
||||||
%define arg1 rdx
|
%define arg1 rdx
|
||||||
|
@ -112,8 +130,8 @@ section .text
|
||||||
|
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
global gf_vect_mul_sse:function
|
global GF_VECT_MUL_SSE:function
|
||||||
func(gf_vect_mul_sse)
|
func(GF_VECT_MUL_SSE)
|
||||||
FUNC_SAVE
|
FUNC_SAVE
|
||||||
mov pos, 0
|
mov pos, 0
|
||||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
@ -175,4 +193,4 @@ global %1_slver
|
||||||
db 0x%3, 0x%2
|
db 0x%3, 0x%2
|
||||||
%endmacro
|
%endmacro
|
||||||
;;; func core, ver, snum
|
;;; func core, ver, snum
|
||||||
slversion gf_vect_mul_sse, 00, 02, 0034
|
slversion GF_VECT_MUL_SSE, 00, 02, 0034
|
||||||
|
|
|
@ -23,7 +23,7 @@ func (s *MySuite) TestPiping(c *C) {
|
||||||
// Run the command on each directory
|
// Run the command on each directory
|
||||||
for _, dir := range dirs {
|
for _, dir := range dirs {
|
||||||
// find $DIR -type f # Find all files
|
// find $DIR -type f # Find all files
|
||||||
ls := exec.Command("ls", dir, "-l")
|
ls := exec.Command("ls", "-l", dir)
|
||||||
|
|
||||||
// | sort -t. -k2 # Sort by file extension
|
// | sort -t. -k2 # Sort by file extension
|
||||||
sort := exec.Command("sort", "-t.", "-k2")
|
sort := exec.Command("sort", "-t.", "-k2")
|
||||||
|
|
Loading…
Reference in New Issue