Merge with Intel ISAL changes from github.com/minio-io/isal

- These changes bring in a much needed Mac OSX port for
    Intel ISAL library
  - At the current stage this MacOSX part of code is
    considered beta
  - pkg/cpu now supports OSX
  - pkg/checksum/crc32c - is still WIP, rest of the packages
    have been validated
This commit is contained in:
Harshavardhana
2015-01-10 22:50:51 -08:00
parent c82d2b95d7
commit f347a1e590
31 changed files with 864 additions and 153 deletions

View File

@@ -1,12 +1,17 @@
all: build test
.PHONY: all
SYSTEM_NAME := $(shell uname -s)
test:
@godep go test -race -coverprofile=cover.out
isal/isal-l.a:
ifeq ($(SYSTEM_NAME), Darwin)
@$(MAKE) -C isal arch=osx lib
else
@$(MAKE) -C isal lib
endif
build: isal/isal-l.a
@godep go build

View File

@@ -60,6 +60,8 @@ int32_t minio_get_source_target (int errs, int k, int m,
*source = tmp_source;
*target = tmp_target;
return 0;
}
/*

View File

@@ -1,3 +1,22 @@
*.o
*.a
*.so
*.so
*~
*.dSYM
erasure-code-base-test
erasure-code-sse-test
erasure-code-test
gf-2vect-dot-prod-sse-test
gf-3vect-dot-prod-sse-test
gf-4vect-dot-prod-sse-test
gf-5vect-dot-prod-sse-test
gf-6vect-dot-prod-sse-test
gf-inverse-test
gf-vect-dot-prod-avx-test
gf-vect-dot-prod-base-test
gf-vect-dot-prod-sse-test
gf-vect-dot-prod-test
gf-vect-mul-avx-test
gf-vect-mul-base-test
gf-vect-mul-sse-test
gf-vect-mul-test

View File

@@ -30,7 +30,7 @@
units = src
default: slib
default: lib
include $(foreach unit,$(units), $(unit)/Makefile)

View File

@@ -41,7 +41,7 @@
extern "C" {
#endif
#ifndef __unix__
#if !defined(__unix__) && !defined(__APPLE__)
#ifdef __MINGW32__
# include <_mingw.h>
#endif
@@ -59,7 +59,7 @@ typedef unsigned char UINT8;
#endif
#ifdef __unix__
#if defined(__unix__) || defined(__APPLE__)
# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
# define __forceinline static inline
#else

View File

@@ -63,6 +63,11 @@ ASFLAGS_win64 = -f win64
CFLAGS_icl = -Qstd=c99
ARFLAGS_win64 = -out:$@
# arch=osx build options
ASFLAGS_osx = -f macho64
ARFLAGS_osx = -r $@
STRIP_gcc = strip -d $@
# arch=mingw build options
ASFLAGS_mingw = -f win64
ARFLAGS_mingw = cr $@
@@ -101,9 +106,13 @@ ifeq ($(arch),win64)
lib_name := $(basename $(lib_name)).lib
endif
lsrcwin64 = $(lsrc)
lsrcosx = $(lsrc)
unit_testswin64 = $(unit_tests)
unit_testsosx = $(unit_tests)
exampleswin64 = $(examples)
examplesosx = $(examples)
perf_testswin64 = $(perf_tests)
perf_testsosx = $(perf_tests)
# Build and run unit tests, performance tests, etc.
all_tests = $(sort $(perf_tests$(arch)) $(unit_tests$(arch)) $(examples$(arch)) $(other_tests))
@@ -199,7 +208,6 @@ perf_report:
@echo Summary:
-grep runtime $(rpt_name)
clean:
@echo Cleaning up
@$(RM) -r $(O)/*.o *.a $(all_tests) $(lib_name) $(so_lib_name)

View File

@@ -33,6 +33,42 @@
%define WRT_OPT
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define EC_ENCODE_DATA_SSE _ec_encode_data_sse
%define EC_ENCODE_DATA_AVX _ec_encode_data_avx
%define EC_ENCODE_DATA_AVX2 _ec_encode_data_avx2
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
%define GF_VECT_MUL_BASE _gf_vect_mul_base
%define EC_ENCODE_DATA_BASE _ec_encode_data_base
%define GF_VECT_DOT_PROD_BASE _gf_vect_dot_prod_base
%define EC_ENCODE_DATA _ec_encode_data
%define GF_VECT_MUL _gf_vect_mul
%define GF_VECT_DOT_PROD _gf_vect_dot_prod
%else
%define EC_ENCODE_DATA_SSE ec_encode_data_sse
%define EC_ENCODE_DATA_AVX ec_encode_data_avx
%define EC_ENCODE_DATA_AVX2 ec_encode_data_avx2
%define GF_VECT_MUL_SSE gf_vect_mul_sse
%define GF_VECT_MUL_AVX gf_vect_mul_avx
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
%define GF_VECT_MUL_BASE gf_vect_mul_base
%define EC_ENCODE_DATA_BASE ec_encode_data_base
%define GF_VECT_DOT_PROD_BASE gf_vect_dot_prod_base
%define EC_ENCODE_DATA ec_encode_data
%define GF_VECT_MUL gf_vect_mul
%define GF_VECT_DOT_PROD gf_vect_dot_prod
%endif
%ifidn __OUTPUT_FORMAT__, elf32
[bits 32]
@@ -51,19 +87,19 @@ default rel
%define wrd_sz qword
%define arg1 rsi
extern ec_encode_data_sse
extern ec_encode_data_avx
extern ec_encode_data_avx2
extern gf_vect_mul_sse
extern gf_vect_mul_avx
extern gf_vect_dot_prod_sse
extern gf_vect_dot_prod_avx
extern gf_vect_dot_prod_avx2
extern EC_ENCODE_DATA_SSE
extern EC_ENCODE_DATA_AVX
extern EC_ENCODE_DATA_AVX2
extern GF_VECT_MUL_SSE
extern GF_VECT_MUL_AVX
extern GF_VECT_DOT_PROD_SSE
extern GF_VECT_DOT_PROD_AVX
extern GF_VECT_DOT_PROD_AVX2
%endif
extern gf_vect_mul_base
extern ec_encode_data_base
extern gf_vect_dot_prod_base
extern GF_VECT_MUL_BASE
extern EC_ENCODE_DATA_BASE
extern GF_VECT_DOT_PROD_BASE
section .data
;;; *_mbinit are initial values for *_dispatched; is updated on first call.
@@ -82,33 +118,33 @@ section .text
;;;;
; ec_encode_data multibinary function
;;;;
global ec_encode_data:function
global EC_ENCODE_DATA:function
ec_encode_data_mbinit:
call ec_encode_data_dispatch_init
ec_encode_data:
EC_ENCODE_DATA:
jmp wrd_sz [ec_encode_data_dispatched]
ec_encode_data_dispatch_init:
push arg1
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
lea arg1, [ec_encode_data_base]
lea arg1, [EC_ENCODE_DATA_BASE]
%else
push rax
push rbx
push rcx
push rdx
lea arg1, [ec_encode_data_base WRT_OPT] ; Default
lea arg1, [EC_ENCODE_DATA_BASE WRT_OPT] ; Default
mov eax, 1
cpuid
lea rbx, [ec_encode_data_sse WRT_OPT]
lea rbx, [EC_ENCODE_DATA_BASE WRT_OPT]
test ecx, FLAG_CPUID1_ECX_SSE4_1
cmovne arg1, rbx
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
lea rbx, [ec_encode_data_avx WRT_OPT]
lea rbx, [EC_ENCODE_DATA_AVX WRT_OPT]
jne _done_ec_encode_data_init
mov rsi, rbx
@@ -118,7 +154,7 @@ ec_encode_data_dispatch_init:
mov eax, 7
cpuid
test ebx, FLAG_CPUID1_EBX_AVX2
lea rbx, [ec_encode_data_avx2 WRT_OPT]
lea rbx, [EC_ENCODE_DATA_AVX2 WRT_OPT]
cmovne rsi, rbx
;; Does it have xmm and ymm support
@@ -127,7 +163,7 @@ ec_encode_data_dispatch_init:
and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
je _done_ec_encode_data_init
lea rsi, [ec_encode_data_sse WRT_OPT]
lea rsi, [EC_ENCODE_DATA_SSE WRT_OPT]
_done_ec_encode_data_init:
pop rdx
@@ -142,30 +178,30 @@ _done_ec_encode_data_init:
;;;;
; gf_vect_mul multibinary function
;;;;
global gf_vect_mul:function
global GF_VECT_MUL:function
gf_vect_mul_mbinit:
call gf_vect_mul_dispatch_init
gf_vect_mul:
GF_VECT_MUL:
jmp wrd_sz [gf_vect_mul_dispatched]
gf_vect_mul_dispatch_init:
push arg1
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
lea arg1, [gf_vect_mul_base]
lea arg1, [GF_VECT_MUL_BASE]
%else
push rax
push rbx
push rcx
push rdx
lea arg1, [gf_vect_mul_base WRT_OPT] ; Default
lea arg1, [GF_VECT_MUL_BASE WRT_OPT] ; Default
mov eax, 1
cpuid
test ecx, FLAG_CPUID1_ECX_SSE4_2
lea rbx, [gf_vect_mul_sse WRT_OPT]
je _done_gf_vect_mul_dispatch_init
mov arg1, rbx
lea rbx, [GF_VECT_MUL_SSE WRT_OPT]
je _done_gf_vect_mul_dispatch_init
mov arg1, rbx
;; Try for AVX
and ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
@@ -178,49 +214,49 @@ gf_vect_mul_dispatch_init:
and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
jne _done_gf_vect_mul_dispatch_init
lea arg1, [gf_vect_mul_avx WRT_OPT]
lea arg1, [GF_VECT_MUL_AVX WRT_OPT]
_done_gf_vect_mul_dispatch_init:
pop rdx
pop rcx
pop rbx
pop rax
%endif ;; END 32-bit check
mov [gf_vect_mul_dispatched], arg1
pop arg1
ret
pop rdx
pop rcx
pop rbx
pop rax
%endif ;; END 32-bit check
mov [gf_vect_mul_dispatched], arg1
pop arg1
ret
;;;;
; gf_vect_dot_prod multibinary function
;;;;
global gf_vect_dot_prod:function
global GF_VECT_DOT_PROD:function
gf_vect_dot_prod_mbinit:
call gf_vect_dot_prod_dispatch_init
gf_vect_dot_prod:
GF_VECT_DOT_PROD:
jmp wrd_sz [gf_vect_dot_prod_dispatched]
gf_vect_dot_prod_dispatch_init:
push arg1
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
lea arg1, [gf_vect_dot_prod_base]
lea arg1, [GF_VECT_DOT_PROD_BASE]
%else
push rax
push rbx
push rcx
push rdx
lea arg1, [gf_vect_dot_prod_base WRT_OPT] ; Default
lea arg1, [GF_VECT_DOT_PROD_BASE WRT_OPT] ; Default
mov eax, 1
cpuid
lea rbx, [gf_vect_dot_prod_sse WRT_OPT]
lea rbx, [GF_VECT_DOT_PROD_SSE WRT_OPT]
test ecx, FLAG_CPUID1_ECX_SSE4_1
cmovne arg1, rbx
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
lea rbx, [gf_vect_dot_prod_avx WRT_OPT]
lea rbx, [GF_VECT_DOT_PROD_AVX WRT_OPT]
jne _done_gf_vect_dot_prod_init
mov rsi, rbx
@@ -230,7 +266,7 @@ gf_vect_dot_prod_dispatch_init:
mov eax, 7
cpuid
test ebx, FLAG_CPUID1_EBX_AVX2
lea rbx, [gf_vect_dot_prod_avx2 WRT_OPT]
lea rbx, [GF_VECT_DOT_PROD_AVX2 WRT_OPT]
cmovne rsi, rbx
;; Does it have xmm and ymm support
@@ -238,8 +274,8 @@ gf_vect_dot_prod_dispatch_init:
xgetbv
and eax, FLAG_XGETBV_EAX_XMM_YMM
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
je _done_gf_vect_dot_prod_init
lea rsi, [gf_vect_dot_prod_sse WRT_OPT]
je _done_gf_vect_dot_prod_init
lea rsi, [GF_VECT_DOT_PROD_SSE WRT_OPT]
_done_gf_vect_dot_prod_init:
pop rdx
@@ -261,6 +297,6 @@ global %1_slver
%endmacro
;;; func core, ver, snum
slversion ec_encode_data, 00, 02, 0133
slversion gf_vect_mul, 00, 02, 0134
slversion gf_vect_dot_prod, 00, 01, 0138
slversion EC_ENCODE_DATA, 00, 02, 0133
slversion GF_VECT_MUL, 00, 02, 0134
slversion GF_VECT_DOT_PROD, 00, 01, 0138

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_DOT_PROD_AVX _gf_2vect_dot_prod_avx
%else
%define GF_2VECT_DOT_PROD_AVX gf_2vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -58,6 +63,31 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r9
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
%endmacro
%macro FUNC_RESTORE 0
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -143,9 +173,8 @@ section .text
%define xp2 xmm3
align 16
global gf_2vect_dot_prod_avx:function
func(gf_2vect_dot_prod_avx)
global GF_2VECT_DOT_PROD_AVX:function
func(GF_2VECT_DOT_PROD_AVX)
FUNC_SAVE
sub len, 16
jl .return_fail
@@ -231,4 +260,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_2vect_dot_prod_avx, 02, 03, 0191
slversion GF_2VECT_DOT_PROD_AVX, 02, 03, 0191

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_DOT_PROD_AVX2 _gf_2vect_dot_prod_avx2
%else
%define GF_2VECT_DOT_PROD_AVX2 gf_2vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -60,6 +65,33 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r9
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
%endmacro
%macro FUNC_RESTORE 0
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -149,9 +181,8 @@ section .text
%define xp2 ymm3
align 16
global gf_2vect_dot_prod_avx2:function
func(gf_2vect_dot_prod_avx2)
global GF_2VECT_DOT_PROD_AVX2:function
func(GF_2VECT_DOT_PROD_AVX2)
FUNC_SAVE
sub len, 32
jl .return_fail
@@ -243,4 +274,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_2vect_dot_prod_avx2, 04, 03, 0196
slversion GF_2VECT_DOT_PROD_AVX2, 04, 03, 0196

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_2VECT_DOT_PROD_SSE _gf_2vect_dot_prod_sse
%else
%define GF_2VECT_DOT_PROD_SSE gf_2vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -58,6 +63,31 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r9
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
%endmacro
%macro FUNC_RESTORE 0
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -144,9 +174,8 @@ section .text
%define xp2 xmm3
align 16
global gf_2vect_dot_prod_sse:function
func(gf_2vect_dot_prod_sse)
global GF_2VECT_DOT_PROD_SSE:function
func(GF_2VECT_DOT_PROD_SSE)
FUNC_SAVE
sub len, 16
jl .return_fail
@@ -233,4 +262,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_2vect_dot_prod_sse, 00, 02, 0062
slversion GF_2VECT_DOT_PROD_SSE, 00, 02, 0062

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_DOT_PROD_AVX _gf_3vect_dot_prod_avx
%else
%define GF_3VECT_DOT_PROD_AVX gf_3vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -60,6 +65,33 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
%endmacro
%macro FUNC_RESTORE 0
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -157,8 +189,8 @@ section .text
%define xp3 xmm4
align 16
global gf_3vect_dot_prod_avx:function
func(gf_3vect_dot_prod_avx)
global GF_3VECT_DOT_PROD_AVX:function
func(GF_3VECT_DOT_PROD_AVX)
FUNC_SAVE
sub len, 16
jl .return_fail
@@ -255,4 +287,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_3vect_dot_prod_avx, 02, 03, 0192
slversion GF_3VECT_DOT_PROD_AVX, 02, 03, 0192

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_DOT_PROD_AVX2 _gf_3vect_dot_prod_avx2
%else
%define GF_3VECT_DOT_PROD_AVX2 gf_3vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -62,6 +67,35 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
%endmacro
%macro FUNC_RESTORE 0
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -162,8 +196,8 @@ section .text
%define xp3 ymm4
align 16
global gf_3vect_dot_prod_avx2:function
func(gf_3vect_dot_prod_avx2)
global GF_3VECT_DOT_PROD_AVX2:function
func(GF_3VECT_DOT_PROD_AVX2)
FUNC_SAVE
sub len, 32
jl .return_fail
@@ -268,4 +302,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_3vect_dot_prod_avx2, 04, 03, 0197
slversion GF_3VECT_DOT_PROD_AVX2, 04, 03, 0197

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_3VECT_DOT_PROD_SSE _gf_3vect_dot_prod_sse
%else
%define GF_3VECT_DOT_PROD_SSE gf_3vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -60,6 +65,33 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
%endmacro
%macro FUNC_RESTORE 0
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -157,8 +189,8 @@ section .text
%define xp3 xmm4
align 16
global gf_3vect_dot_prod_sse:function
func(gf_3vect_dot_prod_sse)
global GF_3VECT_DOT_PROD_SSE:function
func(GF_3VECT_DOT_PROD_SSE)
FUNC_SAVE
sub len, 16
jl .return_fail
@@ -256,4 +288,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_3vect_dot_prod_sse, 00, 03, 0063
slversion GF_3VECT_DOT_PROD_SSE, 00, 03, 0063

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_DOT_PROD_AVX _gf_4vect_dot_prod_avx
%else
%define GF_4VECT_DOT_PROD_AVX gf_4vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -66,6 +71,39 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -182,8 +220,8 @@ section .text
%define xp4 xmm5
align 16
global gf_4vect_dot_prod_avx:function
func(gf_4vect_dot_prod_avx)
global GF_4VECT_DOT_PROD_AVX:function
func(GF_4VECT_DOT_PROD_AVX)
FUNC_SAVE
sub len, 16
jl .return_fail
@@ -293,4 +331,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_4vect_dot_prod_avx, 00, 02, 0064
slversion GF_4VECT_DOT_PROD_AVX, 00, 02, 0064

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_DOT_PROD_AVX2 _gf_4vect_dot_prod_avx2
%else
%define GF_4VECT_DOT_PROD_AVX2 gf_4vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -68,6 +73,41 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -187,8 +227,8 @@ section .text
%define xp4 ymm5
align 16
global gf_4vect_dot_prod_avx2:function
func(gf_4vect_dot_prod_avx2)
global GF_4VECT_DOT_PROD_AVX2:function
func(GF_4VECT_DOT_PROD_AVX2)
FUNC_SAVE
sub len, 32
jl .return_fail
@@ -302,4 +342,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_4vect_dot_prod_avx2, 04, 03, 0064
slversion GF_4VECT_DOT_PROD_AVX2, 04, 03, 0064

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_4VECT_DOT_PROD_SSE _gf_4vect_dot_prod_sse
%else
%define GF_4VECT_DOT_PROD_SSE gf_4vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -66,6 +71,39 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -182,8 +220,8 @@ section .text
%define xp4 xmm5
align 16
global gf_4vect_dot_prod_sse:function
func(gf_4vect_dot_prod_sse)
global GF_4VECT_DOT_PROD_SSE:function
func(GF_4VECT_DOT_PROD_SSE)
FUNC_SAVE
sub len, 16
jl .return_fail
@@ -293,4 +331,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_4vect_dot_prod_sse, 00, 03, 0064
slversion GF_4VECT_DOT_PROD_SSE, 00, 03, 0064

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_DOT_PROD_AVX _gf_5vect_dot_prod_avx
%else
%define GF_5VECT_DOT_PROD_AVX gf_5vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -66,6 +71,39 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_DOT_PROD_AVX2 _gf_5vect_dot_prod_avx2
%else
%define GF_5VECT_DOT_PROD_AVX2 gf_5vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -68,6 +73,41 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -189,8 +229,8 @@ section .text
%define xp5 ymm6
align 16
global gf_5vect_dot_prod_avx2:function
func(gf_5vect_dot_prod_avx2)
global GF_5VECT_DOT_PROD_AVX2:function
func(GF_5VECT_DOT_PROD_AVX2)
FUNC_SAVE
sub len, 32
jl .return_fail
@@ -320,4 +360,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_5vect_dot_prod_avx2, 04, 03, 0199
slversion GF_5VECT_DOT_PROD_AVX2, 04, 03, 0199

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_5VECT_DOT_PROD_SSE _gf_5vect_dot_prod_sse
%else
%define GF_5VECT_DOT_PROD_SSE gf_5vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -66,6 +71,39 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -184,8 +222,8 @@ section .text
%define xp5 xmm6
align 16
global gf_5vect_dot_prod_sse:function
func(gf_5vect_dot_prod_sse)
global GF_5VECT_DOT_PROD_SSE:function
func(GF_5VECT_DOT_PROD_SSE)
FUNC_SAVE
sub len, 16
jl .return_fail
@@ -309,4 +347,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_5vect_dot_prod_sse, 00, 03, 0065
slversion GF_5VECT_DOT_PROD_SSE, 00, 03, 0065

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_DOT_PROD_AVX _gf_6vect_dot_prod_avx
%else
%define GF_6VECT_DOT_PROD_AVX gf_6vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -66,6 +71,39 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -182,8 +220,8 @@ section .text
%define xp6 xmm7
align 16
global gf_6vect_dot_prod_avx:function
func(gf_6vect_dot_prod_avx)
global GF_6VECT_DOT_PROD_AVX:function
func(GF_6VECT_DOT_PROD_AVX)
FUNC_SAVE
sub len, 16
jl .return_fail
@@ -320,4 +358,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_6vect_dot_prod_avx, 02, 03, 0195
slversion GF_6VECT_DOT_PROD_AVX, 02, 03, 0195

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_DOT_PROD_AVX2 _gf_6vect_dot_prod_avx2
%else
%define GF_6VECT_DOT_PROD_AVX2 gf_6vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -68,6 +73,41 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_6VECT_DOT_PROD_SSE _gf_6vect_dot_prod_sse
%else
%define GF_6VECT_DOT_PROD_SSE gf_6vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -66,6 +71,39 @@
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
%define tmp5 r14 ; must be saved and restored
%define tmp6 r15 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
%define func(x) x:
%macro FUNC_SAVE 0
push r12
push r13
push r14
push r15
%endmacro
%macro FUNC_RESTORE 0
pop r15
pop r14
pop r13
pop r12
%endmacro
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -182,8 +220,8 @@ section .text
%define xp6 xmm7
align 16
global gf_6vect_dot_prod_sse:function
func(gf_6vect_dot_prod_sse)
global GF_6VECT_DOT_PROD_SSE:function
func(GF_6VECT_DOT_PROD_SSE)
FUNC_SAVE
sub len, 16
jl .return_fail
@@ -320,4 +358,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_6vect_dot_prod_sse, 00, 03, 0066
slversion GF_6VECT_DOT_PROD_SSE, 00, 03, 0066

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
%else
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -51,6 +56,24 @@
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp2 r10
%define tmp3 r9
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -121,8 +144,8 @@ section .text
%define xp xmm2
align 16
global gf_vect_dot_prod_avx:function
func(gf_vect_dot_prod_avx)
global GF_VECT_DOT_PROD_AVX:function
func(GF_VECT_DOT_PROD_AVX)
FUNC_SAVE
sub len, 16
jl .return_fail
@@ -195,4 +218,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_vect_dot_prod_avx, 02, 03, 0061
slversion GF_VECT_DOT_PROD_AVX, 02, 03, 0061

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
%else
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -53,6 +58,26 @@
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define tmp.w r11d
%define tmp.b r11b
%define tmp2 r10
%define tmp3 r9
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -126,8 +151,8 @@ section .text
%define xp ymm2
align 16
global gf_vect_dot_prod_avx2:function
func(gf_vect_dot_prod_avx2)
global GF_VECT_DOT_PROD_AVX2:function
func(GF_VECT_DOT_PROD_AVX2)
FUNC_SAVE
sub len, 32
jl .return_fail
@@ -200,4 +225,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_vect_dot_prod_avx2, 04, 03, 0190
slversion GF_VECT_DOT_PROD_AVX2, 04, 03, 0190

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
%else
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -50,6 +55,23 @@
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define tmp r11
%define tmp2 r10
%define tmp3 r9
%define return rax
%define PS 8
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%endif
%ifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -121,8 +143,8 @@ section .text
%define xp xmm2
align 16
global gf_vect_dot_prod_sse:function
func(gf_vect_dot_prod_sse)
global GF_VECT_DOT_PROD_SSE:function
func(GF_VECT_DOT_PROD_SSE)
FUNC_SAVE
sub len, 16
jl .return_fail
@@ -192,4 +214,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_vect_dot_prod_sse, 00, 03, 0060
slversion GF_VECT_DOT_PROD_SSE, 00, 03, 0060

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
%else
%define GF_VECT_MUL_AVX gf_vect_mul_avx
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -46,6 +51,19 @@
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define return rax
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -111,8 +129,8 @@ section .text
%define xtmp2c xmm7
align 16
global gf_vect_mul_avx:function
func(gf_vect_mul_avx)
global GF_VECT_MUL_AVX:function
func(GF_VECT_MUL_AVX)
FUNC_SAVE
mov pos, 0
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
@@ -169,4 +187,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_vect_mul_avx, 01, 02, 0036
slversion GF_VECT_MUL_AVX, 01, 02, 0036

View File

@@ -2,7 +2,7 @@
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
@@ -32,6 +32,11 @@
;;;
;;; Author: Gregory Tucker
%ifidn __OUTPUT_FORMAT__, macho64
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
%else
%define GF_VECT_MUL_SSE gf_vect_mul_sse
%endif
%ifidn __OUTPUT_FORMAT__, elf64
%define arg0 rdi
@@ -46,6 +51,19 @@
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, macho64
%define arg0 rdi
%define arg1 rsi
%define arg2 rdx
%define arg3 rcx
%define arg4 r8
%define arg5 r9
%define tmp r11
%define return rax
%define func(x) x:
%define FUNC_SAVE
%define FUNC_RESTORE
%elifidn __OUTPUT_FORMAT__, win64
%define arg0 rcx
%define arg1 rdx
@@ -112,8 +130,8 @@ section .text
align 16
global gf_vect_mul_sse:function
func(gf_vect_mul_sse)
global GF_VECT_MUL_SSE:function
func(GF_VECT_MUL_SSE)
FUNC_SAVE
mov pos, 0
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
@@ -175,4 +193,4 @@ global %1_slver
db 0x%3, 0x%2
%endmacro
;;; func core, ver, snum
slversion gf_vect_mul_sse, 00, 02, 0034
slversion GF_VECT_MUL_SSE, 00, 02, 0034