Merge pull request #167 from harshavardhana/pr_out_merge_with_intel_isal_changes_from_github_com_minio_io_isal

This commit is contained in:
Harshavardhana 2015-01-11 00:43:46 -08:00
commit 033ad56a61
31 changed files with 864 additions and 153 deletions

View File

@ -14,9 +14,6 @@ build-erasure:
@$(MAKE) $(MAKE_OPTIONS) -C pkg/erasure/isal lib @$(MAKE) $(MAKE_OPTIONS) -C pkg/erasure/isal lib
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/erasure @godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/erasure
build-signify:
@$(MAKE) $(MAKE_OPTIONS) -C pkg/signify
build-cpu: build-cpu:
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/cpu @godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/cpu
@ -53,7 +50,7 @@ build-storage-append:
build-storage-encoded: build-storage-encoded:
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/storage/encodedstorage @godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/storage/encodedstorage
cover: build-erasure build-signify build-split build-crc32c build-cpu build-scsi build-storage build-md5 build-sha1 build-sha256 build-sha512 cover: build-erasure build-split build-crc32c build-cpu build-scsi build-storage build-md5 build-sha1 build-sha256 build-sha512
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/gateway @godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/gateway
install: build-erasure install: build-erasure

View File

@ -168,7 +168,7 @@ continue_block:
## branch into array ## branch into array
lea jump_table(%rip), bufp lea jump_table(%rip), bufp
movzxw (bufp, %rax, 2), len movzwq (bufp, %rax, 2), len
offset=crc_array-jump_table offset=crc_array-jump_table
lea offset(bufp, len, 1), bufp lea offset(bufp, len, 1), bufp
jmp *bufp jmp *bufp
@ -194,18 +194,22 @@ full_block:
crc_array: crc_array:
i=128 i=128
.rept 128-1 .rept 128-1
#if !defined(__clang__)
.altmacro .altmacro
LABEL crc_ %i LABEL crc_ %i
.noaltmacro .noaltmacro
#endif
crc32q -i*8(block_0), crc_init crc32q -i*8(block_0), crc_init
crc32q -i*8(block_1), crc1 crc32q -i*8(block_1), crc1
crc32q -i*8(block_2), crc2 crc32q -i*8(block_2), crc2
i=(i-1) i=(i-1)
.endr .endr
#if !defined(__clang__)
.altmacro .altmacro
LABEL crc_ %i LABEL crc_ %i
.noaltmacro .noaltmacro
#endif
crc32q -i*8(block_0), crc_init crc32q -i*8(block_0), crc_init
crc32q -i*8(block_1), crc1 crc32q -i*8(block_1), crc1
# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet # SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet

View File

@ -14,9 +14,18 @@
* limitations under the License. * limitations under the License.
*/ */
#ifdef __APPLE__
#define HAS_SSE _has_sse41
#define HAS_AVX _has_avx
#define HAS_AVX2 _has_avx2
#else
#define HAS_SSE has_sse41
#define HAS_AVX has_avx
#define HAS_AVX2 has_avx2
#endif
.file "cpufeatures.c" .file "cpufeatures.c"
.text .text
.type cpuid, @function
cpuid: cpuid:
.LFB2: .LFB2:
.cfi_startproc .cfi_startproc
@ -56,10 +65,8 @@ cpuid:
ret ret
.cfi_endproc .cfi_endproc
.LFE2: .LFE2:
.size cpuid, .-cpuid .globl HAS_SSE
.globl has_sse41 HAS_SSE:
.type has_sse41, @function
has_sse41:
.LFB3: .LFB3:
.cfi_startproc .cfi_startproc
pushq %rbp pushq %rbp
@ -82,10 +89,8 @@ has_sse41:
ret ret
.cfi_endproc .cfi_endproc
.LFE3: .LFE3:
.size has_sse41, .-has_sse41 .globl HAS_AVX
.globl has_avx HAS_AVX:
.type has_avx, @function
has_avx:
.LFB4: .LFB4:
.cfi_startproc .cfi_startproc
pushq %rbp pushq %rbp
@ -108,10 +113,8 @@ has_avx:
ret ret
.cfi_endproc .cfi_endproc
.LFE4: .LFE4:
.size has_avx, .-has_avx .globl HAS_AVX2
.globl has_avx2 HAS_AVX2:
.type has_avx2, @function
has_avx2:
.LFB5: .LFB5:
.cfi_startproc .cfi_startproc
pushq %rbp pushq %rbp
@ -133,7 +136,3 @@ has_avx2:
.cfi_def_cfa 7, 8 .cfi_def_cfa 7, 8
ret ret
.cfi_endproc .cfi_endproc
.LFE5:
.size has_avx2, .-has_avx2
.ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
.section .note.GNU-stack,"",@progbits

View File

@ -1,12 +1,17 @@
all: build test all: build test
.PHONY: all .PHONY: all
SYSTEM_NAME := $(shell uname -s)
test: test:
@godep go test -race -coverprofile=cover.out @godep go test -race -coverprofile=cover.out
isal/isal-l.a: isal/isal-l.a:
ifeq ($(SYSTEM_NAME), Darwin)
@$(MAKE) -C isal arch=osx lib
else
@$(MAKE) -C isal lib @$(MAKE) -C isal lib
endif
build: isal/isal-l.a build: isal/isal-l.a
@godep go build @godep go build

View File

@ -60,6 +60,8 @@ int32_t minio_get_source_target (int errs, int k, int m,
*source = tmp_source; *source = tmp_source;
*target = tmp_target; *target = tmp_target;
return 0;
} }
/* /*

View File

@ -1,3 +1,22 @@
*.o *.o
*.a *.a
*.so *.so
*~
*.dSYM
erasure-code-base-test
erasure-code-sse-test
erasure-code-test
gf-2vect-dot-prod-sse-test
gf-3vect-dot-prod-sse-test
gf-4vect-dot-prod-sse-test
gf-5vect-dot-prod-sse-test
gf-6vect-dot-prod-sse-test
gf-inverse-test
gf-vect-dot-prod-avx-test
gf-vect-dot-prod-base-test
gf-vect-dot-prod-sse-test
gf-vect-dot-prod-test
gf-vect-mul-avx-test
gf-vect-mul-base-test
gf-vect-mul-sse-test
gf-vect-mul-test

View File

@ -30,7 +30,7 @@
units = src units = src
default: slib default: lib
include $(foreach unit,$(units), $(unit)/Makefile) include $(foreach unit,$(units), $(unit)/Makefile)

View File

@ -41,7 +41,7 @@
extern "C" { extern "C" {
#endif #endif
#ifndef __unix__ #if !defined(__unix__) && !defined(__APPLE__)
#ifdef __MINGW32__ #ifdef __MINGW32__
# include <_mingw.h> # include <_mingw.h>
#endif #endif
@ -59,7 +59,7 @@ typedef unsigned char UINT8;
#endif #endif
#ifdef __unix__ #if defined(__unix__) || defined(__APPLE__)
# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval))) # define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
# define __forceinline static inline # define __forceinline static inline
#else #else

View File

@ -63,6 +63,11 @@ ASFLAGS_win64 = -f win64
CFLAGS_icl = -Qstd=c99 CFLAGS_icl = -Qstd=c99
ARFLAGS_win64 = -out:$@ ARFLAGS_win64 = -out:$@
# arch=osx build options
ASFLAGS_osx = -f macho64
ARFLAGS_osx = -r $@
STRIP_gcc = strip -d $@
# arch=mingw build options # arch=mingw build options
ASFLAGS_mingw = -f win64 ASFLAGS_mingw = -f win64
ARFLAGS_mingw = cr $@ ARFLAGS_mingw = cr $@
@ -101,9 +106,13 @@ ifeq ($(arch),win64)
lib_name := $(basename $(lib_name)).lib lib_name := $(basename $(lib_name)).lib
endif endif
lsrcwin64 = $(lsrc) lsrcwin64 = $(lsrc)
lsrcosx = $(lsrc)
unit_testswin64 = $(unit_tests) unit_testswin64 = $(unit_tests)
unit_testsosx = $(unit_tests)
exampleswin64 = $(examples) exampleswin64 = $(examples)
examplesosx = $(examples)
perf_testswin64 = $(perf_tests) perf_testswin64 = $(perf_tests)
perf_testsosx = $(perf_tests)
# Build and run unit tests, performance tests, etc. # Build and run unit tests, performance tests, etc.
all_tests = $(sort $(perf_tests$(arch)) $(unit_tests$(arch)) $(examples$(arch)) $(other_tests)) all_tests = $(sort $(perf_tests$(arch)) $(unit_tests$(arch)) $(examples$(arch)) $(other_tests))
@ -199,7 +208,6 @@ perf_report:
@echo Summary: @echo Summary:
-grep runtime $(rpt_name) -grep runtime $(rpt_name)
clean: clean:
@echo Cleaning up @echo Cleaning up
@$(RM) -r $(O)/*.o *.a $(all_tests) $(lib_name) $(so_lib_name) @$(RM) -r $(O)/*.o *.a $(all_tests) $(lib_name) $(so_lib_name)

View File

@ -33,6 +33,42 @@
%define WRT_OPT %define WRT_OPT
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define EC_ENCODE_DATA_SSE _ec_encode_data_sse
%define EC_ENCODE_DATA_AVX _ec_encode_data_avx
%define EC_ENCODE_DATA_AVX2 _ec_encode_data_avx2
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
%define GF_VECT_MUL_BASE _gf_vect_mul_base
%define EC_ENCODE_DATA_BASE _ec_encode_data_base
%define GF_VECT_DOT_PROD_BASE _gf_vect_dot_prod_base
%define EC_ENCODE_DATA _ec_encode_data
%define GF_VECT_MUL _gf_vect_mul
%define GF_VECT_DOT_PROD _gf_vect_dot_prod
%else
%define EC_ENCODE_DATA_SSE ec_encode_data_sse
%define EC_ENCODE_DATA_AVX ec_encode_data_avx
%define EC_ENCODE_DATA_AVX2 ec_encode_data_avx2
%define GF_VECT_MUL_SSE gf_vect_mul_sse
%define GF_VECT_MUL_AVX gf_vect_mul_avx
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
%define GF_VECT_MUL_BASE gf_vect_mul_base
%define EC_ENCODE_DATA_BASE ec_encode_data_base
%define GF_VECT_DOT_PROD_BASE gf_vect_dot_prod_base
%define EC_ENCODE_DATA ec_encode_data
%define GF_VECT_MUL gf_vect_mul
%define GF_VECT_DOT_PROD gf_vect_dot_prod
%endif
%ifidn __OUTPUT_FORMAT__, elf32 %ifidn __OUTPUT_FORMAT__, elf32
[bits 32] [bits 32]
@ -51,19 +87,19 @@ default rel
%define wrd_sz qword %define wrd_sz qword
%define arg1 rsi %define arg1 rsi
extern ec_encode_data_sse extern EC_ENCODE_DATA_SSE
extern ec_encode_data_avx extern EC_ENCODE_DATA_AVX
extern ec_encode_data_avx2 extern EC_ENCODE_DATA_AVX2
extern gf_vect_mul_sse extern GF_VECT_MUL_SSE
extern gf_vect_mul_avx extern GF_VECT_MUL_AVX
extern gf_vect_dot_prod_sse extern GF_VECT_DOT_PROD_SSE
extern gf_vect_dot_prod_avx extern GF_VECT_DOT_PROD_AVX
extern gf_vect_dot_prod_avx2 extern GF_VECT_DOT_PROD_AVX2
%endif %endif
extern gf_vect_mul_base extern GF_VECT_MUL_BASE
extern ec_encode_data_base extern EC_ENCODE_DATA_BASE
extern gf_vect_dot_prod_base extern GF_VECT_DOT_PROD_BASE
section .data section .data
;;; *_mbinit are initial values for *_dispatched; is updated on first call. ;;; *_mbinit are initial values for *_dispatched; is updated on first call.
@ -82,33 +118,33 @@ section .text
;;;; ;;;;
; ec_encode_data multibinary function ; ec_encode_data multibinary function
;;;; ;;;;
global ec_encode_data:function global EC_ENCODE_DATA:function
ec_encode_data_mbinit: ec_encode_data_mbinit:
call ec_encode_data_dispatch_init call ec_encode_data_dispatch_init
ec_encode_data: EC_ENCODE_DATA:
jmp wrd_sz [ec_encode_data_dispatched] jmp wrd_sz [ec_encode_data_dispatched]
ec_encode_data_dispatch_init: ec_encode_data_dispatch_init:
push arg1 push arg1
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check %ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
lea arg1, [ec_encode_data_base] lea arg1, [EC_ENCODE_DATA_BASE]
%else %else
push rax push rax
push rbx push rbx
push rcx push rcx
push rdx push rdx
lea arg1, [ec_encode_data_base WRT_OPT] ; Default lea arg1, [EC_ENCODE_DATA_BASE WRT_OPT] ; Default
mov eax, 1 mov eax, 1
cpuid cpuid
lea rbx, [ec_encode_data_sse WRT_OPT] lea rbx, [EC_ENCODE_DATA_BASE WRT_OPT]
test ecx, FLAG_CPUID1_ECX_SSE4_1 test ecx, FLAG_CPUID1_ECX_SSE4_1
cmovne arg1, rbx cmovne arg1, rbx
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
lea rbx, [ec_encode_data_avx WRT_OPT] lea rbx, [EC_ENCODE_DATA_AVX WRT_OPT]
jne _done_ec_encode_data_init jne _done_ec_encode_data_init
mov rsi, rbx mov rsi, rbx
@ -118,7 +154,7 @@ ec_encode_data_dispatch_init:
mov eax, 7 mov eax, 7
cpuid cpuid
test ebx, FLAG_CPUID1_EBX_AVX2 test ebx, FLAG_CPUID1_EBX_AVX2
lea rbx, [ec_encode_data_avx2 WRT_OPT] lea rbx, [EC_ENCODE_DATA_AVX2 WRT_OPT]
cmovne rsi, rbx cmovne rsi, rbx
;; Does it have xmm and ymm support ;; Does it have xmm and ymm support
@ -127,7 +163,7 @@ ec_encode_data_dispatch_init:
and eax, FLAG_XGETBV_EAX_XMM_YMM and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM cmp eax, FLAG_XGETBV_EAX_XMM_YMM
je _done_ec_encode_data_init je _done_ec_encode_data_init
lea rsi, [ec_encode_data_sse WRT_OPT] lea rsi, [EC_ENCODE_DATA_SSE WRT_OPT]
_done_ec_encode_data_init: _done_ec_encode_data_init:
pop rdx pop rdx
@ -142,28 +178,28 @@ _done_ec_encode_data_init:
;;;; ;;;;
; gf_vect_mul multibinary function ; gf_vect_mul multibinary function
;;;; ;;;;
global gf_vect_mul:function global GF_VECT_MUL:function
gf_vect_mul_mbinit: gf_vect_mul_mbinit:
call gf_vect_mul_dispatch_init call gf_vect_mul_dispatch_init
gf_vect_mul: GF_VECT_MUL:
jmp wrd_sz [gf_vect_mul_dispatched] jmp wrd_sz [gf_vect_mul_dispatched]
gf_vect_mul_dispatch_init: gf_vect_mul_dispatch_init:
push arg1 push arg1
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check %ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
lea arg1, [gf_vect_mul_base] lea arg1, [GF_VECT_MUL_BASE]
%else %else
push rax push rax
push rbx push rbx
push rcx push rcx
push rdx push rdx
lea arg1, [gf_vect_mul_base WRT_OPT] ; Default lea arg1, [GF_VECT_MUL_BASE WRT_OPT] ; Default
mov eax, 1 mov eax, 1
cpuid cpuid
test ecx, FLAG_CPUID1_ECX_SSE4_2 test ecx, FLAG_CPUID1_ECX_SSE4_2
lea rbx, [gf_vect_mul_sse WRT_OPT] lea rbx, [GF_VECT_MUL_SSE WRT_OPT]
je _done_gf_vect_mul_dispatch_init je _done_gf_vect_mul_dispatch_init
mov arg1, rbx mov arg1, rbx
@ -178,7 +214,7 @@ gf_vect_mul_dispatch_init:
and eax, FLAG_XGETBV_EAX_XMM_YMM and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM cmp eax, FLAG_XGETBV_EAX_XMM_YMM
jne _done_gf_vect_mul_dispatch_init jne _done_gf_vect_mul_dispatch_init
lea arg1, [gf_vect_mul_avx WRT_OPT] lea arg1, [GF_VECT_MUL_AVX WRT_OPT]
_done_gf_vect_mul_dispatch_init: _done_gf_vect_mul_dispatch_init:
pop rdx pop rdx
@ -194,33 +230,33 @@ _done_gf_vect_mul_dispatch_init:
;;;; ;;;;
; gf_vect_dot_prod multibinary function ; gf_vect_dot_prod multibinary function
;;;; ;;;;
global gf_vect_dot_prod:function global GF_VECT_DOT_PROD:function
gf_vect_dot_prod_mbinit: gf_vect_dot_prod_mbinit:
call gf_vect_dot_prod_dispatch_init call gf_vect_dot_prod_dispatch_init
gf_vect_dot_prod: GF_VECT_DOT_PROD:
jmp wrd_sz [gf_vect_dot_prod_dispatched] jmp wrd_sz [gf_vect_dot_prod_dispatched]
gf_vect_dot_prod_dispatch_init: gf_vect_dot_prod_dispatch_init:
push arg1 push arg1
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check %ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
lea arg1, [gf_vect_dot_prod_base] lea arg1, [GF_VECT_DOT_PROD_BASE]
%else %else
push rax push rax
push rbx push rbx
push rcx push rcx
push rdx push rdx
lea arg1, [gf_vect_dot_prod_base WRT_OPT] ; Default lea arg1, [GF_VECT_DOT_PROD_BASE WRT_OPT] ; Default
mov eax, 1 mov eax, 1
cpuid cpuid
lea rbx, [gf_vect_dot_prod_sse WRT_OPT] lea rbx, [GF_VECT_DOT_PROD_SSE WRT_OPT]
test ecx, FLAG_CPUID1_ECX_SSE4_1 test ecx, FLAG_CPUID1_ECX_SSE4_1
cmovne arg1, rbx cmovne arg1, rbx
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
lea rbx, [gf_vect_dot_prod_avx WRT_OPT] lea rbx, [GF_VECT_DOT_PROD_AVX WRT_OPT]
jne _done_gf_vect_dot_prod_init jne _done_gf_vect_dot_prod_init
mov rsi, rbx mov rsi, rbx
@ -230,7 +266,7 @@ gf_vect_dot_prod_dispatch_init:
mov eax, 7 mov eax, 7
cpuid cpuid
test ebx, FLAG_CPUID1_EBX_AVX2 test ebx, FLAG_CPUID1_EBX_AVX2
lea rbx, [gf_vect_dot_prod_avx2 WRT_OPT] lea rbx, [GF_VECT_DOT_PROD_AVX2 WRT_OPT]
cmovne rsi, rbx cmovne rsi, rbx
;; Does it have xmm and ymm support ;; Does it have xmm and ymm support
@ -239,7 +275,7 @@ gf_vect_dot_prod_dispatch_init:
and eax, FLAG_XGETBV_EAX_XMM_YMM and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM cmp eax, FLAG_XGETBV_EAX_XMM_YMM
je _done_gf_vect_dot_prod_init je _done_gf_vect_dot_prod_init
lea rsi, [gf_vect_dot_prod_sse WRT_OPT] lea rsi, [GF_VECT_DOT_PROD_SSE WRT_OPT]
_done_gf_vect_dot_prod_init: _done_gf_vect_dot_prod_init:
pop rdx pop rdx
@ -261,6 +297,6 @@ global %1_slver
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion ec_encode_data, 00, 02, 0133 slversion EC_ENCODE_DATA, 00, 02, 0133
slversion gf_vect_mul, 00, 02, 0134 slversion GF_VECT_MUL, 00, 02, 0134
slversion gf_vect_dot_prod, 00, 01, 0138 slversion GF_VECT_DOT_PROD, 00, 01, 0138

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_DOT_PROD_AVX _gf_2vect_dot_prod_avx
%else
%define GF_2VECT_DOT_PROD_AVX gf_2vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -58,6 +63,31 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r9
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
%endmacro
%macro FUNC_RESTORE 0
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -143,9 +173,8 @@ section .text
%define xp2 xmm3 %define xp2 xmm3
align 16 align 16
global gf_2vect_dot_prod_avx:function global GF_2VECT_DOT_PROD_AVX:function
func(GF_2VECT_DOT_PROD_AVX)
func(gf_2vect_dot_prod_avx)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -231,4 +260,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_2vect_dot_prod_avx, 02, 03, 0191 slversion GF_2VECT_DOT_PROD_AVX, 02, 03, 0191

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_DOT_PROD_AVX2 _gf_2vect_dot_prod_avx2
%else
%define GF_2VECT_DOT_PROD_AVX2 gf_2vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -60,6 +65,33 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r9
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
%endmacro
%macro FUNC_RESTORE 0
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -149,9 +181,8 @@ section .text
%define xp2 ymm3 %define xp2 ymm3
align 16 align 16
global gf_2vect_dot_prod_avx2:function global GF_2VECT_DOT_PROD_AVX2:function
func(GF_2VECT_DOT_PROD_AVX2)
func(gf_2vect_dot_prod_avx2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -243,4 +274,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_2vect_dot_prod_avx2, 04, 03, 0196 slversion GF_2VECT_DOT_PROD_AVX2, 04, 03, 0196

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_DOT_PROD_SSE _gf_2vect_dot_prod_sse
%else
%define GF_2VECT_DOT_PROD_SSE gf_2vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -58,6 +63,31 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r9
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
%endmacro
%macro FUNC_RESTORE 0
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -144,9 +174,8 @@ section .text
%define xp2 xmm3 %define xp2 xmm3
align 16 align 16
global gf_2vect_dot_prod_sse:function global GF_2VECT_DOT_PROD_SSE:function
func(GF_2VECT_DOT_PROD_SSE)
func(gf_2vect_dot_prod_sse)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -233,4 +262,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_2vect_dot_prod_sse, 00, 02, 0062 slversion GF_2VECT_DOT_PROD_SSE, 00, 02, 0062

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_DOT_PROD_AVX _gf_3vect_dot_prod_avx
%else
%define GF_3VECT_DOT_PROD_AVX gf_3vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -60,6 +65,33 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
%endmacro
%macro FUNC_RESTORE 0
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -157,8 +189,8 @@ section .text
%define xp3 xmm4 %define xp3 xmm4
align 16 align 16
global gf_3vect_dot_prod_avx:function global GF_3VECT_DOT_PROD_AVX:function
func(gf_3vect_dot_prod_avx) func(GF_3VECT_DOT_PROD_AVX)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -255,4 +287,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_3vect_dot_prod_avx, 02, 03, 0192 slversion GF_3VECT_DOT_PROD_AVX, 02, 03, 0192

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_DOT_PROD_AVX2 _gf_3vect_dot_prod_avx2
%else
%define GF_3VECT_DOT_PROD_AVX2 gf_3vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -62,6 +67,35 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
%endmacro
%macro FUNC_RESTORE 0
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -162,8 +196,8 @@ section .text
%define xp3 ymm4 %define xp3 ymm4
align 16 align 16
global gf_3vect_dot_prod_avx2:function global GF_3VECT_DOT_PROD_AVX2:function
func(gf_3vect_dot_prod_avx2) func(GF_3VECT_DOT_PROD_AVX2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -268,4 +302,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_3vect_dot_prod_avx2, 04, 03, 0197 slversion GF_3VECT_DOT_PROD_AVX2, 04, 03, 0197

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_DOT_PROD_SSE _gf_3vect_dot_prod_sse
%else
%define GF_3VECT_DOT_PROD_SSE gf_3vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -60,6 +65,33 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
%endmacro
%macro FUNC_RESTORE 0
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -157,8 +189,8 @@ section .text
%define xp3 xmm4 %define xp3 xmm4
align 16 align 16
global gf_3vect_dot_prod_sse:function global GF_3VECT_DOT_PROD_SSE:function
func(gf_3vect_dot_prod_sse) func(GF_3VECT_DOT_PROD_SSE)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -256,4 +288,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_3vect_dot_prod_sse, 00, 03, 0063 slversion GF_3VECT_DOT_PROD_SSE, 00, 03, 0063

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_DOT_PROD_AVX _gf_4vect_dot_prod_avx
%else
%define GF_4VECT_DOT_PROD_AVX gf_4vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -66,6 +71,39 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -182,8 +220,8 @@ section .text
%define xp4 xmm5 %define xp4 xmm5
align 16 align 16
global gf_4vect_dot_prod_avx:function global GF_4VECT_DOT_PROD_AVX:function
func(gf_4vect_dot_prod_avx) func(GF_4VECT_DOT_PROD_AVX)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -293,4 +331,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_4vect_dot_prod_avx, 00, 02, 0064 slversion GF_4VECT_DOT_PROD_AVX, 00, 02, 0064

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_DOT_PROD_AVX2 _gf_4vect_dot_prod_avx2
%else
%define GF_4VECT_DOT_PROD_AVX2 gf_4vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -68,6 +73,41 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -187,8 +227,8 @@ section .text
%define xp4 ymm5 %define xp4 ymm5
align 16 align 16
global gf_4vect_dot_prod_avx2:function global GF_4VECT_DOT_PROD_AVX2:function
func(gf_4vect_dot_prod_avx2) func(GF_4VECT_DOT_PROD_AVX2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -302,4 +342,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_4vect_dot_prod_avx2, 04, 03, 0064 slversion GF_4VECT_DOT_PROD_AVX2, 04, 03, 0064

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_DOT_PROD_SSE _gf_4vect_dot_prod_sse
%else
%define GF_4VECT_DOT_PROD_SSE gf_4vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -66,6 +71,39 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -182,8 +220,8 @@ section .text
%define xp4 xmm5 %define xp4 xmm5
align 16 align 16
global gf_4vect_dot_prod_sse:function global GF_4VECT_DOT_PROD_SSE:function
func(gf_4vect_dot_prod_sse) func(GF_4VECT_DOT_PROD_SSE)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -293,4 +331,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_4vect_dot_prod_sse, 00, 03, 0064 slversion GF_4VECT_DOT_PROD_SSE, 00, 03, 0064

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_DOT_PROD_AVX _gf_5vect_dot_prod_avx
%else
%define GF_5VECT_DOT_PROD_AVX gf_5vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -66,6 +71,39 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_DOT_PROD_AVX2 _gf_5vect_dot_prod_avx2
%else
%define GF_5VECT_DOT_PROD_AVX2 gf_5vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -68,6 +73,41 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -189,8 +229,8 @@ section .text
%define xp5 ymm6 %define xp5 ymm6
align 16 align 16
global gf_5vect_dot_prod_avx2:function global GF_5VECT_DOT_PROD_AVX2:function
func(gf_5vect_dot_prod_avx2) func(GF_5VECT_DOT_PROD_AVX2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -320,4 +360,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_5vect_dot_prod_avx2, 04, 03, 0199 slversion GF_5VECT_DOT_PROD_AVX2, 04, 03, 0199

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_DOT_PROD_SSE _gf_5vect_dot_prod_sse
%else
%define GF_5VECT_DOT_PROD_SSE gf_5vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -66,6 +71,39 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -184,8 +222,8 @@ section .text
%define xp5 xmm6 %define xp5 xmm6
align 16 align 16
global gf_5vect_dot_prod_sse:function global GF_5VECT_DOT_PROD_SSE:function
func(gf_5vect_dot_prod_sse) func(GF_5VECT_DOT_PROD_SSE)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -309,4 +347,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_5vect_dot_prod_sse, 00, 03, 0065 slversion GF_5VECT_DOT_PROD_SSE, 00, 03, 0065

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_DOT_PROD_AVX _gf_6vect_dot_prod_avx
%else
%define GF_6VECT_DOT_PROD_AVX gf_6vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -66,6 +71,39 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -182,8 +220,8 @@ section .text
%define xp6 xmm7 %define xp6 xmm7
align 16 align 16
global gf_6vect_dot_prod_avx:function global GF_6VECT_DOT_PROD_AVX:function
func(gf_6vect_dot_prod_avx) func(GF_6VECT_DOT_PROD_AVX)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -320,4 +358,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_6vect_dot_prod_avx, 02, 03, 0195 slversion GF_6VECT_DOT_PROD_AVX, 02, 03, 0195

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_DOT_PROD_AVX2 _gf_6vect_dot_prod_avx2
%else
%define GF_6VECT_DOT_PROD_AVX2 gf_6vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -68,6 +73,41 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_DOT_PROD_SSE _gf_6vect_dot_prod_sse
%else
%define GF_6VECT_DOT_PROD_SSE gf_6vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -66,6 +71,39 @@
%endmacro %endmacro
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -182,8 +220,8 @@ section .text
%define xp6 xmm7 %define xp6 xmm7
align 16 align 16
global gf_6vect_dot_prod_sse:function global GF_6VECT_DOT_PROD_SSE:function
func(gf_6vect_dot_prod_sse) func(GF_6VECT_DOT_PROD_SSE)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -320,4 +358,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_6vect_dot_prod_sse, 00, 03, 0066 slversion GF_6VECT_DOT_PROD_SSE, 00, 03, 0066

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
%else
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -51,6 +56,24 @@
%define FUNC_RESTORE %define FUNC_RESTORE
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r9
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -121,8 +144,8 @@ section .text
%define xp xmm2 %define xp xmm2
align 16 align 16
global gf_vect_dot_prod_avx:function global GF_VECT_DOT_PROD_AVX:function
func(gf_vect_dot_prod_avx) func(GF_VECT_DOT_PROD_AVX)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -195,4 +218,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_vect_dot_prod_avx, 02, 03, 0061 slversion GF_VECT_DOT_PROD_AVX, 02, 03, 0061

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
%else
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -53,6 +58,26 @@
%define FUNC_RESTORE %define FUNC_RESTORE
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r9
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -126,8 +151,8 @@ section .text
%define xp ymm2 %define xp ymm2
align 16 align 16
global gf_vect_dot_prod_avx2:function global GF_VECT_DOT_PROD_AVX2:function
func(gf_vect_dot_prod_avx2) func(GF_VECT_DOT_PROD_AVX2)
FUNC_SAVE FUNC_SAVE
sub len, 32 sub len, 32
jl .return_fail jl .return_fail
@ -200,4 +225,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_vect_dot_prod_avx2, 04, 03, 0190 slversion GF_VECT_DOT_PROD_AVX2, 04, 03, 0190

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
%else
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -50,6 +55,23 @@
%define FUNC_RESTORE %define FUNC_RESTORE
%endif %endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define tmp r11
%define tmp2 r10
%define tmp3 r9
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64 %ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -121,8 +143,8 @@ section .text
%define xp xmm2 %define xp xmm2
align 16 align 16
global gf_vect_dot_prod_sse:function global GF_VECT_DOT_PROD_SSE:function
func(gf_vect_dot_prod_sse) func(GF_VECT_DOT_PROD_SSE)
FUNC_SAVE FUNC_SAVE
sub len, 16 sub len, 16
jl .return_fail jl .return_fail
@ -192,4 +214,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_vect_dot_prod_sse, 00, 03, 0060 slversion GF_VECT_DOT_PROD_SSE, 00, 03, 0060

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
%else
%define GF_VECT_MUL_AVX gf_vect_mul_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -46,6 +51,19 @@
%define FUNC_SAVE %define FUNC_SAVE
%define FUNC_RESTORE %define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define return rax
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, win64 %elifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -111,8 +129,8 @@ section .text
%define xtmp2c xmm7 %define xtmp2c xmm7
align 16 align 16
global gf_vect_mul_avx:function global GF_VECT_MUL_AVX:function
func(gf_vect_mul_avx) func(GF_VECT_MUL_AVX)
FUNC_SAVE FUNC_SAVE
mov pos, 0 mov pos, 0
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
@ -169,4 +187,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_vect_mul_avx, 01, 02, 0036 slversion GF_VECT_MUL_AVX, 01, 02, 0036

View File

@ -32,6 +32,11 @@
;;; ;;;
;;; Author: Gregory Tucker ;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
%else
%define GF_VECT_MUL_SSE gf_vect_mul_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64 %ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi %define arg0 rdi
@ -46,6 +51,19 @@
%define FUNC_SAVE %define FUNC_SAVE
%define FUNC_RESTORE %define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define return rax
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, win64 %elifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx %define arg0 rcx
%define arg1 rdx %define arg1 rdx
@ -112,8 +130,8 @@ section .text
align 16 align 16
global gf_vect_mul_sse:function global GF_VECT_MUL_SSE:function
func(gf_vect_mul_sse) func(GF_VECT_MUL_SSE)
FUNC_SAVE FUNC_SAVE
mov pos, 0 mov pos, 0
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
@ -175,4 +193,4 @@ global %1_slver
db 0x%3, 0x%2 db 0x%3, 0x%2
%endmacro %endmacro
;;; func core, ver, snum ;;; func core, ver, snum
slversion gf_vect_mul_sse, 00, 02, 0034 slversion GF_VECT_MUL_SSE, 00, 02, 0034

View File

@ -23,7 +23,7 @@ func (s *MySuite) TestPiping(c *C) {
// Run the command on each directory // Run the command on each directory
for _, dir := range dirs { for _, dir := range dirs {
// find $DIR -type f # Find all files // find $DIR -type f # Find all files
ls := exec.Command("ls", dir, "-l") ls := exec.Command("ls", "-l", dir)
// | sort -t. -k2 # Sort by file extension // | sort -t. -k2 # Sort by file extension
sort := exec.Command("sort", "-t.", "-k2") sort := exec.Command("sort", "-t.", "-k2")