mirror of
				https://github.com/minio/minio.git
				synced 2025-10-30 00:05:02 -04:00 
			
		
		
		
	Merge with Intel ISAL changes from github.com/minio-io/isal
- These changes bring in a much needed Mac OSX port for
    Intel ISAL library
  - At the current stage this MacOSX part of code is
    considered beta
  - pkg/cpu now supports OSX
  - pkg/checksum/crc32c - is still WIP, rest of the packages
    have been validated
			
			
This commit is contained in:
		
							parent
							
								
									c82d2b95d7
								
							
						
					
					
						commit
						f347a1e590
					
				
							
								
								
									
										5
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								Makefile
									
									
									
									
									
								
							| @ -14,9 +14,6 @@ build-erasure: | ||||
| 	@$(MAKE) $(MAKE_OPTIONS) -C pkg/erasure/isal lib | ||||
| 	@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/erasure | ||||
| 
 | ||||
| build-signify: | ||||
| 	@$(MAKE) $(MAKE_OPTIONS) -C pkg/signify | ||||
| 
 | ||||
| build-cpu: | ||||
| 	@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/cpu | ||||
| 
 | ||||
| @ -53,7 +50,7 @@ build-storage-append: | ||||
| build-storage-encoded: | ||||
| 	@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/storage/encodedstorage | ||||
| 
 | ||||
| cover: build-erasure build-signify build-split build-crc32c build-cpu build-scsi build-storage build-md5 build-sha1 build-sha256 build-sha512 | ||||
| cover: build-erasure build-split build-crc32c build-cpu build-scsi build-storage build-md5 build-sha1 build-sha256 build-sha512 | ||||
| 	@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/gateway | ||||
| 
 | ||||
| install: build-erasure | ||||
|  | ||||
| @ -168,7 +168,7 @@ continue_block: | ||||
| 
 | ||||
| 	## branch into array | ||||
| 	lea	jump_table(%rip), bufp | ||||
| 	movzxw  (bufp, %rax, 2), len | ||||
| 	movzwq  (bufp, %rax, 2), len | ||||
| 	offset=crc_array-jump_table | ||||
| 	lea     offset(bufp, len, 1), bufp | ||||
| 	jmp     *bufp | ||||
| @ -194,18 +194,22 @@ full_block: | ||||
| crc_array: | ||||
| 	i=128 | ||||
| .rept 128-1 | ||||
| #if !defined(__clang__) | ||||
| .altmacro | ||||
| LABEL crc_ %i | ||||
| .noaltmacro | ||||
| #endif | ||||
| 	crc32q   -i*8(block_0), crc_init | ||||
| 	crc32q   -i*8(block_1), crc1 | ||||
| 	crc32q   -i*8(block_2), crc2 | ||||
| 	i=(i-1) | ||||
| .endr | ||||
| 
 | ||||
| #if !defined(__clang__) | ||||
| .altmacro | ||||
| LABEL crc_ %i | ||||
| .noaltmacro | ||||
| #endif | ||||
| 	crc32q   -i*8(block_0), crc_init | ||||
| 	crc32q   -i*8(block_1), crc1 | ||||
| # SKIP  crc32  -i*8(block_2), crc2 ; Don't do this one yet
 | ||||
|  | ||||
| @ -14,9 +14,18 @@ | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| 
 | ||||
| #ifdef __APPLE__ | ||||
| #define HAS_SSE _has_sse41 | ||||
| #define HAS_AVX _has_avx | ||||
| #define HAS_AVX2 _has_avx2 | ||||
| #else | ||||
| #define HAS_SSE has_sse41 | ||||
| #define HAS_AVX has_avx | ||||
| #define HAS_AVX2 has_avx2 | ||||
| #endif | ||||
| 
 | ||||
| 	.file	"cpufeatures.c" | ||||
| 	.text | ||||
| 	.type	cpuid, @function
 | ||||
| cpuid: | ||||
| .LFB2: | ||||
| 	.cfi_startproc | ||||
| @ -56,10 +65,8 @@ cpuid: | ||||
| 	ret | ||||
| 	.cfi_endproc | ||||
| .LFE2: | ||||
| 	.size	cpuid, .-cpuid | ||||
| 	.globl	has_sse41
 | ||||
| 	.type	has_sse41, @function
 | ||||
| has_sse41: | ||||
| 	.globl	HAS_SSE
 | ||||
| HAS_SSE: | ||||
| .LFB3: | ||||
| 	.cfi_startproc | ||||
| 	pushq	%rbp | ||||
| @ -82,10 +89,8 @@ has_sse41: | ||||
| 	ret | ||||
| 	.cfi_endproc | ||||
| .LFE3: | ||||
| 	.size	has_sse41, .-has_sse41 | ||||
| 	.globl	has_avx
 | ||||
| 	.type	has_avx, @function
 | ||||
| has_avx: | ||||
| 	.globl	HAS_AVX
 | ||||
| HAS_AVX: | ||||
| .LFB4: | ||||
| 	.cfi_startproc | ||||
| 	pushq	%rbp | ||||
| @ -108,10 +113,8 @@ has_avx: | ||||
| 	ret | ||||
| 	.cfi_endproc | ||||
| .LFE4: | ||||
| 	.size	has_avx, .-has_avx | ||||
| 	.globl	has_avx2
 | ||||
| 	.type	has_avx2, @function
 | ||||
| has_avx2: | ||||
| 	.globl	HAS_AVX2
 | ||||
| HAS_AVX2: | ||||
| .LFB5: | ||||
| 	.cfi_startproc | ||||
| 	pushq	%rbp | ||||
| @ -133,7 +136,3 @@ has_avx2: | ||||
| 	.cfi_def_cfa 7, 8 | ||||
| 	ret | ||||
| 	.cfi_endproc | ||||
| .LFE5: | ||||
| 	.size	has_avx2, .-has_avx2 | ||||
| 	.ident	"GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2" | ||||
| 	.section	.note.GNU-stack,"",@progbits
 | ||||
|  | ||||
| @ -1,12 +1,17 @@ | ||||
| all: build test | ||||
| .PHONY: all | ||||
| 
 | ||||
| SYSTEM_NAME := $(shell uname -s) | ||||
| 
 | ||||
| test: | ||||
| 	@godep go test -race -coverprofile=cover.out | ||||
| 
 | ||||
| isal/isal-l.a: | ||||
| ifeq ($(SYSTEM_NAME), Darwin) | ||||
| 	@$(MAKE) -C isal arch=osx lib | ||||
| else | ||||
| 	@$(MAKE) -C isal lib | ||||
| 
 | ||||
| endif | ||||
| build: isal/isal-l.a | ||||
| 	@godep go build | ||||
| 
 | ||||
|  | ||||
| @ -60,6 +60,8 @@ int32_t minio_get_source_target (int errs, int k, int m, | ||||
| 
 | ||||
|         *source = tmp_source; | ||||
|         *target = tmp_target; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | ||||
							
								
								
									
										21
									
								
								pkg/erasure/isal/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										21
									
								
								pkg/erasure/isal/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1,3 +1,22 @@ | ||||
| *.o | ||||
| *.a | ||||
| *.so | ||||
| *.so | ||||
| *~ | ||||
| *.dSYM | ||||
| erasure-code-base-test | ||||
| erasure-code-sse-test | ||||
| erasure-code-test | ||||
| gf-2vect-dot-prod-sse-test | ||||
| gf-3vect-dot-prod-sse-test | ||||
| gf-4vect-dot-prod-sse-test | ||||
| gf-5vect-dot-prod-sse-test | ||||
| gf-6vect-dot-prod-sse-test | ||||
| gf-inverse-test | ||||
| gf-vect-dot-prod-avx-test | ||||
| gf-vect-dot-prod-base-test | ||||
| gf-vect-dot-prod-sse-test | ||||
| gf-vect-dot-prod-test | ||||
| gf-vect-mul-avx-test | ||||
| gf-vect-mul-base-test | ||||
| gf-vect-mul-sse-test | ||||
| gf-vect-mul-test | ||||
|  | ||||
| @ -30,7 +30,7 @@ | ||||
| 
 | ||||
| units = src | ||||
| 
 | ||||
| default: slib | ||||
| default: lib | ||||
| 
 | ||||
| include $(foreach unit,$(units), $(unit)/Makefile) | ||||
| 
 | ||||
|  | ||||
| @ -41,7 +41,7 @@ | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| #ifndef __unix__ | ||||
| #if !defined(__unix__) && !defined(__APPLE__) | ||||
| #ifdef __MINGW32__ | ||||
| # include <_mingw.h> | ||||
| #endif | ||||
| @ -59,7 +59,7 @@ typedef unsigned char      UINT8; | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| #ifdef __unix__ | ||||
| #if defined(__unix__) || defined(__APPLE__) | ||||
| # define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval))) | ||||
| # define __forceinline static inline | ||||
| #else | ||||
|  | ||||
| @ -63,6 +63,11 @@ ASFLAGS_win64 = -f win64 | ||||
| CFLAGS_icl    = -Qstd=c99 | ||||
| ARFLAGS_win64 = -out:$@ | ||||
| 
 | ||||
| # arch=osx build options
 | ||||
| ASFLAGS_osx = -f macho64 | ||||
| ARFLAGS_osx = -r $@ | ||||
| STRIP_gcc = strip -d $@ | ||||
| 
 | ||||
| # arch=mingw build options
 | ||||
| ASFLAGS_mingw = -f win64 | ||||
| ARFLAGS_mingw = cr $@ | ||||
| @ -101,9 +106,13 @@ ifeq ($(arch),win64) | ||||
|   lib_name := $(basename $(lib_name)).lib | ||||
| endif | ||||
| lsrcwin64 = $(lsrc) | ||||
| lsrcosx = $(lsrc) | ||||
| unit_testswin64 = $(unit_tests) | ||||
| unit_testsosx = $(unit_tests) | ||||
| exampleswin64 = $(examples) | ||||
| examplesosx = $(examples) | ||||
| perf_testswin64 = $(perf_tests) | ||||
| perf_testsosx = $(perf_tests) | ||||
| 
 | ||||
| # Build and run unit tests, performance tests, etc.
 | ||||
| all_tests = $(sort $(perf_tests$(arch)) $(unit_tests$(arch)) $(examples$(arch)) $(other_tests)) | ||||
| @ -199,7 +208,6 @@ perf_report: | ||||
| 	@echo Summary: | ||||
| 	-grep runtime $(rpt_name) | ||||
| 
 | ||||
| 
 | ||||
| clean: | ||||
| 	@echo Cleaning up | ||||
| 	@$(RM) -r $(O)/*.o *.a $(all_tests) $(lib_name) $(so_lib_name) | ||||
|  | ||||
| @ -33,6 +33,42 @@ | ||||
| %define WRT_OPT | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
| %define EC_ENCODE_DATA_SSE _ec_encode_data_sse | ||||
| %define EC_ENCODE_DATA_AVX _ec_encode_data_avx | ||||
| %define EC_ENCODE_DATA_AVX2 _ec_encode_data_avx2 | ||||
| %define GF_VECT_MUL_SSE _gf_vect_mul_sse | ||||
| %define GF_VECT_MUL_AVX _gf_vect_mul_avx | ||||
| %define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse | ||||
| %define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx | ||||
| %define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2 | ||||
| %define GF_VECT_MUL_BASE _gf_vect_mul_base | ||||
| %define EC_ENCODE_DATA_BASE _ec_encode_data_base | ||||
| %define GF_VECT_DOT_PROD_BASE _gf_vect_dot_prod_base | ||||
| 
 | ||||
| %define EC_ENCODE_DATA _ec_encode_data | ||||
| %define GF_VECT_MUL _gf_vect_mul | ||||
| %define GF_VECT_DOT_PROD _gf_vect_dot_prod | ||||
| 
 | ||||
| %else | ||||
| %define EC_ENCODE_DATA_SSE ec_encode_data_sse | ||||
| %define EC_ENCODE_DATA_AVX ec_encode_data_avx | ||||
| %define EC_ENCODE_DATA_AVX2 ec_encode_data_avx2 | ||||
| %define GF_VECT_MUL_SSE gf_vect_mul_sse | ||||
| %define GF_VECT_MUL_AVX gf_vect_mul_avx | ||||
| %define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse | ||||
| %define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx | ||||
| %define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2 | ||||
| %define GF_VECT_MUL_BASE gf_vect_mul_base | ||||
| %define EC_ENCODE_DATA_BASE ec_encode_data_base | ||||
| %define GF_VECT_DOT_PROD_BASE gf_vect_dot_prod_base | ||||
| 
 | ||||
| %define EC_ENCODE_DATA ec_encode_data | ||||
| %define GF_VECT_MUL gf_vect_mul | ||||
| %define GF_VECT_DOT_PROD gf_vect_dot_prod | ||||
| 
 | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf32 | ||||
| 
 | ||||
| [bits 32] | ||||
| @ -51,19 +87,19 @@ default rel | ||||
| %define wrd_sz  	qword | ||||
| %define arg1		rsi | ||||
| 
 | ||||
| extern ec_encode_data_sse | ||||
| extern ec_encode_data_avx | ||||
| extern ec_encode_data_avx2 | ||||
| extern gf_vect_mul_sse | ||||
| extern gf_vect_mul_avx | ||||
| extern gf_vect_dot_prod_sse | ||||
| extern gf_vect_dot_prod_avx | ||||
| extern gf_vect_dot_prod_avx2 | ||||
| extern EC_ENCODE_DATA_SSE | ||||
| extern EC_ENCODE_DATA_AVX | ||||
| extern EC_ENCODE_DATA_AVX2 | ||||
| extern GF_VECT_MUL_SSE | ||||
| extern GF_VECT_MUL_AVX | ||||
| extern GF_VECT_DOT_PROD_SSE | ||||
| extern GF_VECT_DOT_PROD_AVX | ||||
| extern GF_VECT_DOT_PROD_AVX2 | ||||
| %endif | ||||
| 
 | ||||
| extern gf_vect_mul_base | ||||
| extern ec_encode_data_base | ||||
| extern gf_vect_dot_prod_base | ||||
| extern GF_VECT_MUL_BASE | ||||
| extern EC_ENCODE_DATA_BASE | ||||
| extern GF_VECT_DOT_PROD_BASE | ||||
| 
 | ||||
| section .data | ||||
| ;;; *_mbinit are initial values for *_dispatched; is updated on first call. | ||||
| @ -82,33 +118,33 @@ section .text | ||||
| ;;;; | ||||
| ; ec_encode_data multibinary function | ||||
| ;;;; | ||||
| global ec_encode_data:function | ||||
| global EC_ENCODE_DATA:function | ||||
| ec_encode_data_mbinit: | ||||
| 	call	ec_encode_data_dispatch_init | ||||
| 
 | ||||
| ec_encode_data: | ||||
| EC_ENCODE_DATA: | ||||
| 	jmp	wrd_sz [ec_encode_data_dispatched] | ||||
| 
 | ||||
| ec_encode_data_dispatch_init: | ||||
| 	push    arg1 | ||||
| %ifidn __OUTPUT_FORMAT__, elf32		;; 32-bit check | ||||
| 	lea     arg1, [ec_encode_data_base] | ||||
| 	lea     arg1, [EC_ENCODE_DATA_BASE] | ||||
| %else | ||||
| 	push    rax | ||||
| 	push    rbx | ||||
| 	push    rcx | ||||
| 	push    rdx | ||||
| 	lea     arg1, [ec_encode_data_base WRT_OPT] ; Default | ||||
| 	lea     arg1, [EC_ENCODE_DATA_BASE WRT_OPT] ; Default | ||||
| 
 | ||||
| 	mov     eax, 1 | ||||
| 	cpuid | ||||
| 	lea     rbx, [ec_encode_data_sse WRT_OPT] | ||||
| 	lea     rbx, [EC_ENCODE_DATA_BASE WRT_OPT] | ||||
| 	test    ecx, FLAG_CPUID1_ECX_SSE4_1 | ||||
| 	cmovne  arg1, rbx | ||||
| 
 | ||||
| 	and	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) | ||||
| 	cmp	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) | ||||
| 	lea	rbx, [ec_encode_data_avx WRT_OPT] | ||||
| 	lea	rbx, [EC_ENCODE_DATA_AVX WRT_OPT] | ||||
| 
 | ||||
| 	jne	_done_ec_encode_data_init | ||||
| 	mov	rsi, rbx | ||||
| @ -118,7 +154,7 @@ ec_encode_data_dispatch_init: | ||||
| 	mov	eax, 7 | ||||
| 	cpuid | ||||
| 	test	ebx, FLAG_CPUID1_EBX_AVX2 | ||||
| 	lea     rbx, [ec_encode_data_avx2 WRT_OPT] | ||||
| 	lea     rbx, [EC_ENCODE_DATA_AVX2 WRT_OPT] | ||||
| 	cmovne	rsi, rbx | ||||
| 
 | ||||
| 	;; Does it have xmm and ymm support | ||||
| @ -127,7 +163,7 @@ ec_encode_data_dispatch_init: | ||||
| 	and	eax, FLAG_XGETBV_EAX_XMM_YMM | ||||
| 	cmp	eax, FLAG_XGETBV_EAX_XMM_YMM | ||||
| 	je	_done_ec_encode_data_init | ||||
| 	lea     rsi, [ec_encode_data_sse WRT_OPT] | ||||
| 	lea     rsi, [EC_ENCODE_DATA_SSE WRT_OPT] | ||||
| 
 | ||||
| _done_ec_encode_data_init: | ||||
| 	pop     rdx | ||||
| @ -142,30 +178,30 @@ _done_ec_encode_data_init: | ||||
| ;;;; | ||||
| ; gf_vect_mul multibinary function | ||||
| ;;;; | ||||
| global gf_vect_mul:function | ||||
| global GF_VECT_MUL:function | ||||
| gf_vect_mul_mbinit: | ||||
| 	call    gf_vect_mul_dispatch_init | ||||
| 
 | ||||
| gf_vect_mul: | ||||
| GF_VECT_MUL: | ||||
| 	jmp	wrd_sz [gf_vect_mul_dispatched] | ||||
| 
 | ||||
| gf_vect_mul_dispatch_init: | ||||
| 	push    arg1 | ||||
| %ifidn __OUTPUT_FORMAT__, elf32		;; 32-bit check | ||||
| 	lea     arg1, [gf_vect_mul_base] | ||||
| 	lea     arg1, [GF_VECT_MUL_BASE] | ||||
| %else | ||||
| 	push    rax | ||||
| 	push    rbx | ||||
| 	push    rcx | ||||
| 	push    rdx | ||||
| 	lea     arg1, [gf_vect_mul_base WRT_OPT] ; Default | ||||
| 	lea     arg1, [GF_VECT_MUL_BASE WRT_OPT] ; Default | ||||
| 
 | ||||
| 	mov     eax, 1 | ||||
| 	cpuid | ||||
| 	test    ecx, FLAG_CPUID1_ECX_SSE4_2 | ||||
| 	lea     rbx, [gf_vect_mul_sse WRT_OPT] | ||||
| 	je	_done_gf_vect_mul_dispatch_init | ||||
| 	mov  	arg1, rbx | ||||
| 	lea     rbx, [GF_VECT_MUL_SSE WRT_OPT] | ||||
| 	je      _done_gf_vect_mul_dispatch_init | ||||
| 	mov     arg1, rbx | ||||
| 
 | ||||
| 	;; Try for AVX | ||||
| 	and     ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX) | ||||
| @ -178,49 +214,49 @@ gf_vect_mul_dispatch_init: | ||||
| 	and     eax, FLAG_XGETBV_EAX_XMM_YMM | ||||
| 	cmp     eax, FLAG_XGETBV_EAX_XMM_YMM | ||||
| 	jne     _done_gf_vect_mul_dispatch_init | ||||
| 	lea     arg1, [gf_vect_mul_avx WRT_OPT] | ||||
| 	lea     arg1, [GF_VECT_MUL_AVX WRT_OPT] | ||||
| 
 | ||||
| _done_gf_vect_mul_dispatch_init: | ||||
| 	pop     rdx | ||||
| 	pop     rcx | ||||
| 	pop     rbx | ||||
| 	pop     rax | ||||
| %endif			;; END 32-bit check | ||||
| 	mov     [gf_vect_mul_dispatched], arg1 | ||||
| 	pop     arg1 | ||||
| 	ret | ||||
|         pop     rdx | ||||
|         pop     rcx | ||||
|         pop     rbx | ||||
|         pop     rax | ||||
| %endif  ;; END 32-bit check | ||||
|         mov     [gf_vect_mul_dispatched], arg1 | ||||
|         pop     arg1 | ||||
|         ret | ||||
| 
 | ||||
| 
 | ||||
| ;;;; | ||||
| ; gf_vect_dot_prod multibinary function | ||||
| ;;;; | ||||
| global gf_vect_dot_prod:function | ||||
| global GF_VECT_DOT_PROD:function | ||||
| gf_vect_dot_prod_mbinit: | ||||
| 	call    gf_vect_dot_prod_dispatch_init | ||||
| 
 | ||||
| gf_vect_dot_prod: | ||||
| GF_VECT_DOT_PROD: | ||||
| 	jmp     wrd_sz [gf_vect_dot_prod_dispatched] | ||||
| 
 | ||||
| gf_vect_dot_prod_dispatch_init: | ||||
| 	push    arg1 | ||||
| %ifidn __OUTPUT_FORMAT__, elf32         ;; 32-bit check | ||||
| 	lea     arg1, [gf_vect_dot_prod_base] | ||||
| 	lea     arg1, [GF_VECT_DOT_PROD_BASE] | ||||
| %else | ||||
| 	push	rax | ||||
| 	push	rbx | ||||
| 	push	rcx | ||||
| 	push	rdx | ||||
| 	lea     arg1, [gf_vect_dot_prod_base WRT_OPT] ; Default | ||||
| 	lea     arg1, [GF_VECT_DOT_PROD_BASE WRT_OPT] ; Default | ||||
| 
 | ||||
| 	mov     eax, 1 | ||||
| 	cpuid | ||||
| 	lea     rbx, [gf_vect_dot_prod_sse WRT_OPT] | ||||
| 	lea     rbx, [GF_VECT_DOT_PROD_SSE WRT_OPT] | ||||
| 	test    ecx, FLAG_CPUID1_ECX_SSE4_1 | ||||
| 	cmovne  arg1, rbx | ||||
| 
 | ||||
| 	and	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) | ||||
| 	cmp	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) | ||||
| 	lea     rbx, [gf_vect_dot_prod_avx WRT_OPT] | ||||
| 	lea     rbx, [GF_VECT_DOT_PROD_AVX WRT_OPT] | ||||
| 
 | ||||
| 	jne     _done_gf_vect_dot_prod_init | ||||
| 	mov	rsi, rbx | ||||
| @ -230,7 +266,7 @@ gf_vect_dot_prod_dispatch_init: | ||||
| 	mov	eax, 7 | ||||
| 	cpuid | ||||
| 	test	ebx, FLAG_CPUID1_EBX_AVX2 | ||||
| 	lea     rbx, [gf_vect_dot_prod_avx2 WRT_OPT] | ||||
| 	lea     rbx, [GF_VECT_DOT_PROD_AVX2 WRT_OPT] | ||||
| 	cmovne	rsi, rbx | ||||
| 
 | ||||
| 	;; Does it have xmm and ymm support | ||||
| @ -238,8 +274,8 @@ gf_vect_dot_prod_dispatch_init: | ||||
| 	xgetbv | ||||
| 	and	eax, FLAG_XGETBV_EAX_XMM_YMM | ||||
| 	cmp	eax, FLAG_XGETBV_EAX_XMM_YMM | ||||
| 	je	_done_gf_vect_dot_prod_init | ||||
| 	lea     rsi, [gf_vect_dot_prod_sse WRT_OPT] | ||||
| 	je      _done_gf_vect_dot_prod_init | ||||
| 	lea     rsi, [GF_VECT_DOT_PROD_SSE WRT_OPT] | ||||
| 
 | ||||
| _done_gf_vect_dot_prod_init: | ||||
| 	pop     rdx | ||||
| @ -261,6 +297,6 @@ global %1_slver | ||||
| %endmacro | ||||
| 
 | ||||
| ;;;       func                  core, ver, snum | ||||
| slversion ec_encode_data,	00,   02,  0133 | ||||
| slversion gf_vect_mul,		00,   02,  0134 | ||||
| slversion gf_vect_dot_prod,	00,   01,  0138 | ||||
| slversion EC_ENCODE_DATA,	00,   02,  0133 | ||||
| slversion GF_VECT_MUL,		00,   02,  0134 | ||||
| slversion GF_VECT_DOT_PROD,	00,   01,  0138 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_2VECT_DOT_PROD_AVX _gf_2vect_dot_prod_avx | ||||
| %else | ||||
|  %define GF_2VECT_DOT_PROD_AVX gf_2vect_dot_prod_avx | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -58,6 +63,31 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r9 | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -143,9 +173,8 @@ section .text | ||||
| %define xp2    xmm3 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_2vect_dot_prod_avx:function | ||||
| 
 | ||||
| func(gf_2vect_dot_prod_avx) | ||||
| global GF_2VECT_DOT_PROD_AVX:function | ||||
| func(GF_2VECT_DOT_PROD_AVX) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 16 | ||||
| 	jl	.return_fail | ||||
| @ -231,4 +260,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                  core, ver, snum | ||||
| slversion gf_2vect_dot_prod_avx, 02,  03,  0191 | ||||
| slversion GF_2VECT_DOT_PROD_AVX, 02,  03,  0191 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_2VECT_DOT_PROD_AVX2 _gf_2vect_dot_prod_avx2 | ||||
| %else | ||||
|  %define GF_2VECT_DOT_PROD_AVX2 gf_2vect_dot_prod_avx2 | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -60,6 +65,33 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp.w r11d | ||||
|  %define tmp.b r11b | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r9 | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -149,9 +181,8 @@ section .text | ||||
| %define xp2    ymm3 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_2vect_dot_prod_avx2:function | ||||
| 
 | ||||
| func(gf_2vect_dot_prod_avx2) | ||||
| global GF_2VECT_DOT_PROD_AVX2:function | ||||
| func(GF_2VECT_DOT_PROD_AVX2) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 32 | ||||
| 	jl	.return_fail | ||||
| @ -243,4 +274,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                   core, ver, snum | ||||
| slversion gf_2vect_dot_prod_avx2, 04,  03,  0196 | ||||
| slversion GF_2VECT_DOT_PROD_AVX2, 04,  03,  0196 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_2VECT_DOT_PROD_SSE _gf_2vect_dot_prod_sse | ||||
| %else | ||||
|  %define GF_2VECT_DOT_PROD_SSE gf_2vect_dot_prod_sse | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -58,6 +63,31 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r9 | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -144,9 +174,8 @@ section .text | ||||
| %define xp2    xmm3 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_2vect_dot_prod_sse:function | ||||
| 
 | ||||
| func(gf_2vect_dot_prod_sse) | ||||
| global GF_2VECT_DOT_PROD_SSE:function | ||||
| func(GF_2VECT_DOT_PROD_SSE) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 16 | ||||
| 	jl	.return_fail | ||||
| @ -233,4 +262,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                  core, ver, snum | ||||
| slversion gf_2vect_dot_prod_sse, 00,  02,  0062 | ||||
| slversion GF_2VECT_DOT_PROD_SSE, 00,  02,  0062 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_3VECT_DOT_PROD_AVX _gf_3vect_dot_prod_avx | ||||
| %else | ||||
|  %define GF_3VECT_DOT_PROD_AVX gf_3vect_dot_prod_avx | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -60,6 +65,33 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r13		; must be saved and restored | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
| 	push	r13 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r13 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -157,8 +189,8 @@ section .text | ||||
| %define xp3    xmm4 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_3vect_dot_prod_avx:function | ||||
| func(gf_3vect_dot_prod_avx) | ||||
| global GF_3VECT_DOT_PROD_AVX:function | ||||
| func(GF_3VECT_DOT_PROD_AVX) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 16 | ||||
| 	jl	.return_fail | ||||
| @ -255,4 +287,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                  core, ver, snum | ||||
| slversion gf_3vect_dot_prod_avx, 02,  03,  0192 | ||||
| slversion GF_3VECT_DOT_PROD_AVX, 02,  03,  0192 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_3VECT_DOT_PROD_AVX2 _gf_3vect_dot_prod_avx2 | ||||
| %else | ||||
|  %define GF_3VECT_DOT_PROD_AVX2 gf_3vect_dot_prod_avx2 | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -62,6 +67,35 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp.w r11d | ||||
|  %define tmp.b r11b | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r13		; must be saved and restored | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
| 	push	r13 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r13 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -162,8 +196,8 @@ section .text | ||||
| %define xp3    ymm4 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_3vect_dot_prod_avx2:function | ||||
| func(gf_3vect_dot_prod_avx2) | ||||
| global GF_3VECT_DOT_PROD_AVX2:function | ||||
| func(GF_3VECT_DOT_PROD_AVX2) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 32 | ||||
| 	jl	.return_fail | ||||
| @ -268,4 +302,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                   core, ver, snum | ||||
| slversion gf_3vect_dot_prod_avx2, 04,  03,  0197 | ||||
| slversion GF_3VECT_DOT_PROD_AVX2, 04,  03,  0197 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_3VECT_DOT_PROD_SSE _gf_3vect_dot_prod_sse | ||||
| %else | ||||
|  %define GF_3VECT_DOT_PROD_SSE gf_3vect_dot_prod_sse | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -60,6 +65,33 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r13		; must be saved and restored | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
| 	push	r13 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r13 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -157,8 +189,8 @@ section .text | ||||
| %define xp3    xmm4 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_3vect_dot_prod_sse:function | ||||
| func(gf_3vect_dot_prod_sse) | ||||
| global GF_3VECT_DOT_PROD_SSE:function | ||||
| func(GF_3VECT_DOT_PROD_SSE) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 16 | ||||
| 	jl	.return_fail | ||||
| @ -256,4 +288,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                  core, ver, snum | ||||
| slversion gf_3vect_dot_prod_sse, 00,  03,  0063 | ||||
| slversion GF_3VECT_DOT_PROD_SSE, 00,  03,  0063 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_4VECT_DOT_PROD_AVX _gf_4vect_dot_prod_avx | ||||
| %else | ||||
|  %define GF_4VECT_DOT_PROD_AVX gf_4vect_dot_prod_avx | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -66,6 +71,39 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r13		; must be saved and restored | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define tmp5  r14		; must be saved and restored | ||||
|  %define tmp6  r15		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
| 	push	r13 | ||||
| 	push	r14 | ||||
| 	push	r15 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r15 | ||||
| 	pop	r14 | ||||
| 	pop	r13 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -182,8 +220,8 @@ section .text | ||||
| %define xp4    xmm5 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_4vect_dot_prod_avx:function | ||||
| func(gf_4vect_dot_prod_avx) | ||||
| global GF_4VECT_DOT_PROD_AVX:function | ||||
| func(GF_4VECT_DOT_PROD_AVX) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 16 | ||||
| 	jl	.return_fail | ||||
| @ -293,4 +331,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                  core, ver, snum | ||||
| slversion gf_4vect_dot_prod_avx, 00,  02,  0064 | ||||
| slversion GF_4VECT_DOT_PROD_AVX, 00,  02,  0064 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_4VECT_DOT_PROD_AVX2 _gf_4vect_dot_prod_avx2 | ||||
| %else | ||||
|  %define GF_4VECT_DOT_PROD_AVX2 gf_4vect_dot_prod_avx2 | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -68,6 +73,41 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp.w r11d | ||||
|  %define tmp.b r11b | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r13		; must be saved and restored | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define tmp5  r14		; must be saved and restored | ||||
|  %define tmp6  r15		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
| 	push	r13 | ||||
| 	push	r14 | ||||
| 	push	r15 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r15 | ||||
| 	pop	r14 | ||||
| 	pop	r13 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -187,8 +227,8 @@ section .text | ||||
| %define xp4    ymm5 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_4vect_dot_prod_avx2:function | ||||
| func(gf_4vect_dot_prod_avx2) | ||||
| global GF_4VECT_DOT_PROD_AVX2:function | ||||
| func(GF_4VECT_DOT_PROD_AVX2) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 32 | ||||
| 	jl	.return_fail | ||||
| @ -302,4 +342,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                   core, ver, snum | ||||
| slversion gf_4vect_dot_prod_avx2, 04,  03,  0064 | ||||
| slversion GF_4VECT_DOT_PROD_AVX2, 04,  03,  0064 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_4VECT_DOT_PROD_SSE _gf_4vect_dot_prod_sse | ||||
| %else | ||||
|  %define GF_4VECT_DOT_PROD_SSE gf_4vect_dot_prod_sse | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -66,6 +71,39 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r13		; must be saved and restored | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define tmp5  r14		; must be saved and restored | ||||
|  %define tmp6  r15		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
| 	push	r13 | ||||
| 	push	r14 | ||||
| 	push	r15 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r15 | ||||
| 	pop	r14 | ||||
| 	pop	r13 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -182,8 +220,8 @@ section .text | ||||
| %define xp4    xmm5 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_4vect_dot_prod_sse:function | ||||
| func(gf_4vect_dot_prod_sse) | ||||
| global GF_4VECT_DOT_PROD_SSE:function | ||||
| func(GF_4VECT_DOT_PROD_SSE) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 16 | ||||
| 	jl	.return_fail | ||||
| @ -293,4 +331,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                  core, ver, snum | ||||
| slversion gf_4vect_dot_prod_sse, 00,  03,  0064 | ||||
| slversion GF_4VECT_DOT_PROD_SSE, 00,  03,  0064 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_5VECT_DOT_PROD_AVX _gf_5vect_dot_prod_avx | ||||
| %else | ||||
|  %define GF_5VECT_DOT_PROD_AVX gf_5vect_dot_prod_avx | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -66,6 +71,39 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r13		; must be saved and restored | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define tmp5  r14		; must be saved and restored | ||||
|  %define tmp6  r15		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
| 	push	r13 | ||||
| 	push	r14 | ||||
| 	push	r15 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r15 | ||||
| 	pop	r14 | ||||
| 	pop	r13 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_5VECT_DOT_PROD_AVX2 _gf_5vect_dot_prod_avx2 | ||||
| %else | ||||
|  %define GF_5VECT_DOT_PROD_AVX2 gf_5vect_dot_prod_avx2 | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -68,6 +73,41 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp.w r11d | ||||
|  %define tmp.b r11b | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r13		; must be saved and restored | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define tmp5  r14		; must be saved and restored | ||||
|  %define tmp6  r15		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
| 	push	r13 | ||||
| 	push	r14 | ||||
| 	push	r15 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r15 | ||||
| 	pop	r14 | ||||
| 	pop	r13 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -189,8 +229,8 @@ section .text | ||||
| %define xp5    ymm6 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_5vect_dot_prod_avx2:function | ||||
| func(gf_5vect_dot_prod_avx2) | ||||
| global GF_5VECT_DOT_PROD_AVX2:function | ||||
| func(GF_5VECT_DOT_PROD_AVX2) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 32 | ||||
| 	jl	.return_fail | ||||
| @ -320,4 +360,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                  core, ver, snum | ||||
| slversion gf_5vect_dot_prod_avx2, 04,  03,  0199 | ||||
| slversion GF_5VECT_DOT_PROD_AVX2, 04,  03,  0199 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_5VECT_DOT_PROD_SSE _gf_5vect_dot_prod_sse | ||||
| %else | ||||
|  %define GF_5VECT_DOT_PROD_SSE gf_5vect_dot_prod_sse | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -66,6 +71,39 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r13		; must be saved and restored | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define tmp5  r14		; must be saved and restored | ||||
|  %define tmp6  r15		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
| 	push	r13 | ||||
| 	push	r14 | ||||
| 	push	r15 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r15 | ||||
| 	pop	r14 | ||||
| 	pop	r13 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -184,8 +222,8 @@ section .text | ||||
| %define xp5    xmm6 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_5vect_dot_prod_sse:function | ||||
| func(gf_5vect_dot_prod_sse) | ||||
| global GF_5VECT_DOT_PROD_SSE:function | ||||
| func(GF_5VECT_DOT_PROD_SSE) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 16 | ||||
| 	jl	.return_fail | ||||
| @ -309,4 +347,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                  core, ver, snum | ||||
| slversion gf_5vect_dot_prod_sse, 00,  03,  0065 | ||||
| slversion GF_5VECT_DOT_PROD_SSE, 00,  03,  0065 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_6VECT_DOT_PROD_AVX _gf_6vect_dot_prod_avx | ||||
| %else | ||||
|  %define GF_6VECT_DOT_PROD_AVX gf_6vect_dot_prod_avx | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -66,6 +71,39 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r13		; must be saved and restored | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define tmp5  r14		; must be saved and restored | ||||
|  %define tmp6  r15		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
| 	push	r13 | ||||
| 	push	r14 | ||||
| 	push	r15 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r15 | ||||
| 	pop	r14 | ||||
| 	pop	r13 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -182,8 +220,8 @@ section .text | ||||
| %define xp6    xmm7 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_6vect_dot_prod_avx:function | ||||
| func(gf_6vect_dot_prod_avx) | ||||
| global GF_6VECT_DOT_PROD_AVX:function | ||||
| func(GF_6VECT_DOT_PROD_AVX) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 16 | ||||
| 	jl	.return_fail | ||||
| @ -320,4 +358,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                  core, ver, snum | ||||
| slversion gf_6vect_dot_prod_avx, 02,  03,  0195 | ||||
| slversion GF_6VECT_DOT_PROD_AVX, 02,  03,  0195 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_6VECT_DOT_PROD_AVX2 _gf_6vect_dot_prod_avx2 | ||||
| %else | ||||
|  %define GF_6VECT_DOT_PROD_AVX2 gf_6vect_dot_prod_avx2 | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -68,6 +73,41 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp.w r11d | ||||
|  %define tmp.b r11b | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r13		; must be saved and restored | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define tmp5  r14		; must be saved and restored | ||||
|  %define tmp6  r15		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
| 	push	r13 | ||||
| 	push	r14 | ||||
| 	push	r15 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r15 | ||||
| 	pop	r14 | ||||
| 	pop	r13 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_6VECT_DOT_PROD_SSE _gf_6vect_dot_prod_sse | ||||
| %else | ||||
|  %define GF_6VECT_DOT_PROD_SSE gf_6vect_dot_prod_sse | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -66,6 +71,39 @@ | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r13		; must be saved and restored | ||||
|  %define tmp4  r12		; must be saved and restored | ||||
|  %define tmp5  r14		; must be saved and restored | ||||
|  %define tmp6  r15		; must be saved and restored | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define LOG_PS 3 | ||||
| 
 | ||||
|  %define func(x) x: | ||||
|  %macro FUNC_SAVE 0 | ||||
| 	push	r12 | ||||
| 	push	r13 | ||||
| 	push	r14 | ||||
| 	push	r15 | ||||
|  %endmacro | ||||
|  %macro FUNC_RESTORE 0 | ||||
| 	pop	r15 | ||||
| 	pop	r14 | ||||
| 	pop	r13 | ||||
| 	pop	r12 | ||||
|  %endmacro | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -182,8 +220,8 @@ section .text | ||||
| %define xp6    xmm7 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_6vect_dot_prod_sse:function | ||||
| func(gf_6vect_dot_prod_sse) | ||||
| global GF_6VECT_DOT_PROD_SSE:function | ||||
| func(GF_6VECT_DOT_PROD_SSE) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 16 | ||||
| 	jl	.return_fail | ||||
| @ -320,4 +358,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                  core, ver, snum | ||||
| slversion gf_6vect_dot_prod_sse, 00,  03,  0066 | ||||
| slversion GF_6VECT_DOT_PROD_SSE, 00,  03,  0066 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx | ||||
| %else | ||||
|  %define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -51,6 +56,24 @@ | ||||
|  %define FUNC_RESTORE | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r9 | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define func(x) x: | ||||
|  %define FUNC_SAVE | ||||
|  %define FUNC_RESTORE | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -121,8 +144,8 @@ section .text | ||||
| %define xp     xmm2 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_vect_dot_prod_avx:function | ||||
| func(gf_vect_dot_prod_avx) | ||||
| global GF_VECT_DOT_PROD_AVX:function | ||||
| func(GF_VECT_DOT_PROD_AVX) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 16 | ||||
| 	jl	.return_fail | ||||
| @ -195,4 +218,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                 core, ver, snum | ||||
| slversion gf_vect_dot_prod_avx, 02,  03,  0061 | ||||
| slversion GF_VECT_DOT_PROD_AVX, 02,  03,  0061 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2 | ||||
| %else | ||||
|  %define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2 | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -53,6 +58,26 @@ | ||||
|  %define FUNC_RESTORE | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp.w r11d | ||||
|  %define tmp.b r11b | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r9 | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define func(x) x: | ||||
|  %define FUNC_SAVE | ||||
|  %define FUNC_RESTORE | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -126,8 +151,8 @@ section .text | ||||
| %define xp     ymm2 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_vect_dot_prod_avx2:function | ||||
| func(gf_vect_dot_prod_avx2) | ||||
| global GF_VECT_DOT_PROD_AVX2:function | ||||
| func(GF_VECT_DOT_PROD_AVX2) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 32 | ||||
| 	jl	.return_fail | ||||
| @ -200,4 +225,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                  core, ver, snum | ||||
| slversion gf_vect_dot_prod_avx2, 04,  03,  0190 | ||||
| slversion GF_VECT_DOT_PROD_AVX2, 04,  03,  0190 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse | ||||
| %else | ||||
|  %define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -50,6 +55,23 @@ | ||||
|  %define FUNC_RESTORE | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
| 
 | ||||
|  %define tmp   r11 | ||||
|  %define tmp2  r10 | ||||
|  %define tmp3  r9 | ||||
|  %define return rax | ||||
|  %define PS 8 | ||||
|  %define func(x) x: | ||||
|  %define FUNC_SAVE | ||||
|  %define FUNC_RESTORE | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0   rcx | ||||
|  %define arg1   rdx | ||||
| @ -121,8 +143,8 @@ section .text | ||||
| %define xp     xmm2 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_vect_dot_prod_sse:function | ||||
| func(gf_vect_dot_prod_sse) | ||||
| global GF_VECT_DOT_PROD_SSE:function | ||||
| func(GF_VECT_DOT_PROD_SSE) | ||||
| 	FUNC_SAVE | ||||
| 	sub	len, 16 | ||||
| 	jl	.return_fail | ||||
| @ -192,4 +214,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func                 core, ver, snum | ||||
| slversion gf_vect_dot_prod_sse, 00,  03,  0060 | ||||
| slversion GF_VECT_DOT_PROD_SSE, 00,  03,  0060 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_VECT_MUL_AVX _gf_vect_mul_avx | ||||
| %else | ||||
|  %define GF_VECT_MUL_AVX gf_vect_mul_avx | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -46,6 +51,19 @@ | ||||
|  %define FUNC_SAVE | ||||
|  %define FUNC_RESTORE | ||||
| 
 | ||||
| %elifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
|  %define tmp   r11 | ||||
|  %define return rax | ||||
|  %define func(x) x: | ||||
|  %define FUNC_SAVE | ||||
|  %define FUNC_RESTORE | ||||
| 
 | ||||
| %elifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0  rcx | ||||
|  %define arg1  rdx | ||||
| @ -111,8 +129,8 @@ section .text | ||||
| %define xtmp2c xmm7 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_vect_mul_avx:function | ||||
| func(gf_vect_mul_avx) | ||||
| global GF_VECT_MUL_AVX:function | ||||
| func(GF_VECT_MUL_AVX) | ||||
| 	FUNC_SAVE | ||||
| 	mov	pos, 0 | ||||
| 	vmovdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte | ||||
| @ -169,4 +187,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func             core, ver, snum | ||||
| slversion gf_vect_mul_avx, 01,   02,  0036 | ||||
| slversion GF_VECT_MUL_AVX, 01,   02,  0036 | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| ;  Copyright(c) 2011-2014 Intel Corporation All rights reserved. | ||||
| ; | ||||
| ;  Redistribution and use in source and binary forms, with or without | ||||
| ;  modification, are permitted provided that the following conditions  | ||||
| ;  modification, are permitted provided that the following conditions | ||||
| ;  are met: | ||||
| ;    * Redistributions of source code must retain the above copyright | ||||
| ;      notice, this list of conditions and the following disclaimer. | ||||
| @ -32,6 +32,11 @@ | ||||
| ;;; | ||||
| ;;; Author: Gregory Tucker | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define GF_VECT_MUL_SSE _gf_vect_mul_sse | ||||
| %else | ||||
|  %define GF_VECT_MUL_SSE gf_vect_mul_sse | ||||
| %endif | ||||
| 
 | ||||
| %ifidn __OUTPUT_FORMAT__, elf64 | ||||
|  %define arg0  rdi | ||||
| @ -46,6 +51,19 @@ | ||||
|  %define FUNC_SAVE | ||||
|  %define FUNC_RESTORE | ||||
| 
 | ||||
| %elifidn __OUTPUT_FORMAT__, macho64 | ||||
|  %define arg0  rdi | ||||
|  %define arg1  rsi | ||||
|  %define arg2  rdx | ||||
|  %define arg3  rcx | ||||
|  %define arg4  r8 | ||||
|  %define arg5  r9 | ||||
|  %define tmp   r11 | ||||
|  %define return rax | ||||
|  %define func(x) x: | ||||
|  %define FUNC_SAVE | ||||
|  %define FUNC_RESTORE | ||||
| 
 | ||||
| %elifidn __OUTPUT_FORMAT__, win64 | ||||
|  %define arg0  rcx | ||||
|  %define arg1  rdx | ||||
| @ -112,8 +130,8 @@ section .text | ||||
| 
 | ||||
| 
 | ||||
| align 16 | ||||
| global gf_vect_mul_sse:function | ||||
| func(gf_vect_mul_sse) | ||||
| global GF_VECT_MUL_SSE:function | ||||
| func(GF_VECT_MUL_SSE) | ||||
| 	FUNC_SAVE | ||||
| 	mov	pos, 0 | ||||
| 	movdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte | ||||
| @ -175,4 +193,4 @@ global %1_slver | ||||
| 	db 0x%3, 0x%2 | ||||
| %endmacro | ||||
| ;;;       func        core, ver, snum | ||||
| slversion gf_vect_mul_sse, 00,   02,  0034 | ||||
| slversion GF_VECT_MUL_SSE, 00,   02,  0034 | ||||
|  | ||||
| @ -23,7 +23,7 @@ func (s *MySuite) TestPiping(c *C) { | ||||
| 	// Run the command on each directory | ||||
| 	for _, dir := range dirs { | ||||
| 		// find $DIR -type f # Find all files | ||||
| 		ls := exec.Command("ls", dir, "-l") | ||||
| 		ls := exec.Command("ls", "-l", dir) | ||||
| 
 | ||||
| 		// | sort -t. -k2 # Sort by file extension | ||||
| 		sort := exec.Command("sort", "-t.", "-k2") | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user