mirror of https://github.com/minio/minio.git
Merge pull request #440 from fkautz/pr_out_removing_godeps_from_pkg_api
This commit is contained in:
commit
2abb4bba3a
|
@ -1,41 +0,0 @@
|
|||
{
|
||||
"ImportPath": "github.com/minio-io/minio/pkg/api",
|
||||
"GoVersion": "go1.4",
|
||||
"Packages": [
|
||||
"./..."
|
||||
],
|
||||
"Deps": [
|
||||
{
|
||||
"ImportPath": "github.com/gorilla/context",
|
||||
"Rev": "50c25fb3b2b3b3cc724e9b6ac75fb44b3bccd0da"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/gorilla/mux",
|
||||
"Rev": "e444e69cbd2e2e3e0749a2f3c717cec491552bbf"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/minio-io/erasure",
|
||||
"Rev": "3cece1a107115563682604b1430418e28f65dd80"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/minio-io/iodine",
|
||||
"Rev": "55cc4d4256c68fbd6f0775f1a25e37e6a2f6457e"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/stretchr/objx",
|
||||
"Rev": "cbeaeb16a013161a98496fad62933b1d21786672"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/stretchr/testify/assert",
|
||||
"Rev": "e4ec8152c15fc46bd5056ce65997a07c7d415325"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/stretchr/testify/mock",
|
||||
"Rev": "e4ec8152c15fc46bd5056ce65997a07c7d415325"
|
||||
},
|
||||
{
|
||||
"ImportPath": "gopkg.in/check.v1",
|
||||
"Rev": "64131543e7896d5bcc6bd5a76287eb75ea96c673"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -1,5 +0,0 @@
|
|||
This directory tree is generated automatically by godep.
|
||||
|
||||
Please do not edit.
|
||||
|
||||
See https://github.com/tools/godep for more information.
|
|
@ -1,2 +0,0 @@
|
|||
/pkg
|
||||
/bin
|
|
@ -1,7 +0,0 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.0
|
||||
- 1.1
|
||||
- 1.2
|
||||
- tip
|
|
@ -1,27 +0,0 @@
|
|||
Copyright (c) 2012 Rodrigo Moraes. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -1,7 +0,0 @@
|
|||
context
|
||||
=======
|
||||
[![Build Status](https://travis-ci.org/gorilla/context.png?branch=master)](https://travis-ci.org/gorilla/context)
|
||||
|
||||
gorilla/context is a general purpose registry for global request variables.
|
||||
|
||||
Read the full documentation here: http://www.gorillatoolkit.org/pkg/context
|
|
@ -1,143 +0,0 @@
|
|||
// Copyright 2012 The Gorilla Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package context
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
mutex sync.RWMutex
|
||||
data = make(map[*http.Request]map[interface{}]interface{})
|
||||
datat = make(map[*http.Request]int64)
|
||||
)
|
||||
|
||||
// Set stores a value for a given key in a given request.
|
||||
func Set(r *http.Request, key, val interface{}) {
|
||||
mutex.Lock()
|
||||
if data[r] == nil {
|
||||
data[r] = make(map[interface{}]interface{})
|
||||
datat[r] = time.Now().Unix()
|
||||
}
|
||||
data[r][key] = val
|
||||
mutex.Unlock()
|
||||
}
|
||||
|
||||
// Get returns a value stored for a given key in a given request.
|
||||
func Get(r *http.Request, key interface{}) interface{} {
|
||||
mutex.RLock()
|
||||
if ctx := data[r]; ctx != nil {
|
||||
value := ctx[key]
|
||||
mutex.RUnlock()
|
||||
return value
|
||||
}
|
||||
mutex.RUnlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetOk returns stored value and presence state like multi-value return of map access.
|
||||
func GetOk(r *http.Request, key interface{}) (interface{}, bool) {
|
||||
mutex.RLock()
|
||||
if _, ok := data[r]; ok {
|
||||
value, ok := data[r][key]
|
||||
mutex.RUnlock()
|
||||
return value, ok
|
||||
}
|
||||
mutex.RUnlock()
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// GetAll returns all stored values for the request as a map. Nil is returned for invalid requests.
|
||||
func GetAll(r *http.Request) map[interface{}]interface{} {
|
||||
mutex.RLock()
|
||||
if context, ok := data[r]; ok {
|
||||
result := make(map[interface{}]interface{}, len(context))
|
||||
for k, v := range context {
|
||||
result[k] = v
|
||||
}
|
||||
mutex.RUnlock()
|
||||
return result
|
||||
}
|
||||
mutex.RUnlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAllOk returns all stored values for the request as a map and a boolean value that indicates if
|
||||
// the request was registered.
|
||||
func GetAllOk(r *http.Request) (map[interface{}]interface{}, bool) {
|
||||
mutex.RLock()
|
||||
context, ok := data[r]
|
||||
result := make(map[interface{}]interface{}, len(context))
|
||||
for k, v := range context {
|
||||
result[k] = v
|
||||
}
|
||||
mutex.RUnlock()
|
||||
return result, ok
|
||||
}
|
||||
|
||||
// Delete removes a value stored for a given key in a given request.
|
||||
func Delete(r *http.Request, key interface{}) {
|
||||
mutex.Lock()
|
||||
if data[r] != nil {
|
||||
delete(data[r], key)
|
||||
}
|
||||
mutex.Unlock()
|
||||
}
|
||||
|
||||
// Clear removes all values stored for a given request.
|
||||
//
|
||||
// This is usually called by a handler wrapper to clean up request
|
||||
// variables at the end of a request lifetime. See ClearHandler().
|
||||
func Clear(r *http.Request) {
|
||||
mutex.Lock()
|
||||
clear(r)
|
||||
mutex.Unlock()
|
||||
}
|
||||
|
||||
// clear is Clear without the lock.
|
||||
func clear(r *http.Request) {
|
||||
delete(data, r)
|
||||
delete(datat, r)
|
||||
}
|
||||
|
||||
// Purge removes request data stored for longer than maxAge, in seconds.
|
||||
// It returns the amount of requests removed.
|
||||
//
|
||||
// If maxAge <= 0, all request data is removed.
|
||||
//
|
||||
// This is only used for sanity check: in case context cleaning was not
|
||||
// properly set some request data can be kept forever, consuming an increasing
|
||||
// amount of memory. In case this is detected, Purge() must be called
|
||||
// periodically until the problem is fixed.
|
||||
func Purge(maxAge int) int {
|
||||
mutex.Lock()
|
||||
count := 0
|
||||
if maxAge <= 0 {
|
||||
count = len(data)
|
||||
data = make(map[*http.Request]map[interface{}]interface{})
|
||||
datat = make(map[*http.Request]int64)
|
||||
} else {
|
||||
min := time.Now().Unix() - int64(maxAge)
|
||||
for r := range data {
|
||||
if datat[r] < min {
|
||||
clear(r)
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
mutex.Unlock()
|
||||
return count
|
||||
}
|
||||
|
||||
// ClearHandler wraps an http.Handler and clears request values at the end
|
||||
// of a request lifetime.
|
||||
func ClearHandler(h http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
defer Clear(r)
|
||||
h.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
|
@ -1,161 +0,0 @@
|
|||
// Copyright 2012 The Gorilla Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package context
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type keyType int
|
||||
|
||||
const (
|
||||
key1 keyType = iota
|
||||
key2
|
||||
)
|
||||
|
||||
func TestContext(t *testing.T) {
|
||||
assertEqual := func(val interface{}, exp interface{}) {
|
||||
if val != exp {
|
||||
t.Errorf("Expected %v, got %v.", exp, val)
|
||||
}
|
||||
}
|
||||
|
||||
r, _ := http.NewRequest("GET", "http://localhost:8080/", nil)
|
||||
emptyR, _ := http.NewRequest("GET", "http://localhost:8080/", nil)
|
||||
|
||||
// Get()
|
||||
assertEqual(Get(r, key1), nil)
|
||||
|
||||
// Set()
|
||||
Set(r, key1, "1")
|
||||
assertEqual(Get(r, key1), "1")
|
||||
assertEqual(len(data[r]), 1)
|
||||
|
||||
Set(r, key2, "2")
|
||||
assertEqual(Get(r, key2), "2")
|
||||
assertEqual(len(data[r]), 2)
|
||||
|
||||
//GetOk
|
||||
value, ok := GetOk(r, key1)
|
||||
assertEqual(value, "1")
|
||||
assertEqual(ok, true)
|
||||
|
||||
value, ok = GetOk(r, "not exists")
|
||||
assertEqual(value, nil)
|
||||
assertEqual(ok, false)
|
||||
|
||||
Set(r, "nil value", nil)
|
||||
value, ok = GetOk(r, "nil value")
|
||||
assertEqual(value, nil)
|
||||
assertEqual(ok, true)
|
||||
|
||||
// GetAll()
|
||||
values := GetAll(r)
|
||||
assertEqual(len(values), 3)
|
||||
|
||||
// GetAll() for empty request
|
||||
values = GetAll(emptyR)
|
||||
if values != nil {
|
||||
t.Error("GetAll didn't return nil value for invalid request")
|
||||
}
|
||||
|
||||
// GetAllOk()
|
||||
values, ok = GetAllOk(r)
|
||||
assertEqual(len(values), 3)
|
||||
assertEqual(ok, true)
|
||||
|
||||
// GetAllOk() for empty request
|
||||
values, ok = GetAllOk(emptyR)
|
||||
assertEqual(value, nil)
|
||||
assertEqual(ok, false)
|
||||
|
||||
// Delete()
|
||||
Delete(r, key1)
|
||||
assertEqual(Get(r, key1), nil)
|
||||
assertEqual(len(data[r]), 2)
|
||||
|
||||
Delete(r, key2)
|
||||
assertEqual(Get(r, key2), nil)
|
||||
assertEqual(len(data[r]), 1)
|
||||
|
||||
// Clear()
|
||||
Clear(r)
|
||||
assertEqual(len(data), 0)
|
||||
}
|
||||
|
||||
func parallelReader(r *http.Request, key string, iterations int, wait, done chan struct{}) {
|
||||
<-wait
|
||||
for i := 0; i < iterations; i++ {
|
||||
Get(r, key)
|
||||
}
|
||||
done <- struct{}{}
|
||||
|
||||
}
|
||||
|
||||
func parallelWriter(r *http.Request, key, value string, iterations int, wait, done chan struct{}) {
|
||||
<-wait
|
||||
for i := 0; i < iterations; i++ {
|
||||
Set(r, key, value)
|
||||
}
|
||||
done <- struct{}{}
|
||||
|
||||
}
|
||||
|
||||
func benchmarkMutex(b *testing.B, numReaders, numWriters, iterations int) {
|
||||
|
||||
b.StopTimer()
|
||||
r, _ := http.NewRequest("GET", "http://localhost:8080/", nil)
|
||||
done := make(chan struct{})
|
||||
b.StartTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
wait := make(chan struct{})
|
||||
|
||||
for i := 0; i < numReaders; i++ {
|
||||
go parallelReader(r, "test", iterations, wait, done)
|
||||
}
|
||||
|
||||
for i := 0; i < numWriters; i++ {
|
||||
go parallelWriter(r, "test", "123", iterations, wait, done)
|
||||
}
|
||||
|
||||
close(wait)
|
||||
|
||||
for i := 0; i < numReaders+numWriters; i++ {
|
||||
<-done
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func BenchmarkMutexSameReadWrite1(b *testing.B) {
|
||||
benchmarkMutex(b, 1, 1, 32)
|
||||
}
|
||||
func BenchmarkMutexSameReadWrite2(b *testing.B) {
|
||||
benchmarkMutex(b, 2, 2, 32)
|
||||
}
|
||||
func BenchmarkMutexSameReadWrite4(b *testing.B) {
|
||||
benchmarkMutex(b, 4, 4, 32)
|
||||
}
|
||||
func BenchmarkMutex1(b *testing.B) {
|
||||
benchmarkMutex(b, 2, 8, 32)
|
||||
}
|
||||
func BenchmarkMutex2(b *testing.B) {
|
||||
benchmarkMutex(b, 16, 4, 64)
|
||||
}
|
||||
func BenchmarkMutex3(b *testing.B) {
|
||||
benchmarkMutex(b, 1, 2, 128)
|
||||
}
|
||||
func BenchmarkMutex4(b *testing.B) {
|
||||
benchmarkMutex(b, 128, 32, 256)
|
||||
}
|
||||
func BenchmarkMutex5(b *testing.B) {
|
||||
benchmarkMutex(b, 1024, 2048, 64)
|
||||
}
|
||||
func BenchmarkMutex6(b *testing.B) {
|
||||
benchmarkMutex(b, 2048, 1024, 512)
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
// Copyright 2012 The Gorilla Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
Package context stores values shared during a request lifetime.
|
||||
|
||||
For example, a router can set variables extracted from the URL and later
|
||||
application handlers can access those values, or it can be used to store
|
||||
sessions values to be saved at the end of a request. There are several
|
||||
others common uses.
|
||||
|
||||
The idea was posted by Brad Fitzpatrick to the go-nuts mailing list:
|
||||
|
||||
http://groups.google.com/group/golang-nuts/msg/e2d679d303aa5d53
|
||||
|
||||
Here's the basic usage: first define the keys that you will need. The key
|
||||
type is interface{} so a key can be of any type that supports equality.
|
||||
Here we define a key using a custom int type to avoid name collisions:
|
||||
|
||||
package foo
|
||||
|
||||
import (
|
||||
"github.com/gorilla/context"
|
||||
)
|
||||
|
||||
type key int
|
||||
|
||||
const MyKey key = 0
|
||||
|
||||
Then set a variable. Variables are bound to an http.Request object, so you
|
||||
need a request instance to set a value:
|
||||
|
||||
context.Set(r, MyKey, "bar")
|
||||
|
||||
The application can later access the variable using the same key you provided:
|
||||
|
||||
func MyHandler(w http.ResponseWriter, r *http.Request) {
|
||||
// val is "bar".
|
||||
val := context.Get(r, foo.MyKey)
|
||||
|
||||
// returns ("bar", true)
|
||||
val, ok := context.GetOk(r, foo.MyKey)
|
||||
// ...
|
||||
}
|
||||
|
||||
And that's all about the basic usage. We discuss some other ideas below.
|
||||
|
||||
Any type can be stored in the context. To enforce a given type, make the key
|
||||
private and wrap Get() and Set() to accept and return values of a specific
|
||||
type:
|
||||
|
||||
type key int
|
||||
|
||||
const mykey key = 0
|
||||
|
||||
// GetMyKey returns a value for this package from the request values.
|
||||
func GetMyKey(r *http.Request) SomeType {
|
||||
if rv := context.Get(r, mykey); rv != nil {
|
||||
return rv.(SomeType)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetMyKey sets a value for this package in the request values.
|
||||
func SetMyKey(r *http.Request, val SomeType) {
|
||||
context.Set(r, mykey, val)
|
||||
}
|
||||
|
||||
Variables must be cleared at the end of a request, to remove all values
|
||||
that were stored. This can be done in an http.Handler, after a request was
|
||||
served. Just call Clear() passing the request:
|
||||
|
||||
context.Clear(r)
|
||||
|
||||
...or use ClearHandler(), which conveniently wraps an http.Handler to clear
|
||||
variables at the end of a request lifetime.
|
||||
|
||||
The Routers from the packages gorilla/mux and gorilla/pat call Clear()
|
||||
so if you are using either of them you don't need to clear the context manually.
|
||||
*/
|
||||
package context
|
|
@ -1,7 +0,0 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.0
|
||||
- 1.1
|
||||
- 1.2
|
||||
- tip
|
|
@ -1,27 +0,0 @@
|
|||
Copyright (c) 2012 Rodrigo Moraes. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -1,7 +0,0 @@
|
|||
mux
|
||||
===
|
||||
[![Build Status](https://travis-ci.org/gorilla/mux.png?branch=master)](https://travis-ci.org/gorilla/mux)
|
||||
|
||||
gorilla/mux is a powerful URL router and dispatcher.
|
||||
|
||||
Read the full documentation here: http://www.gorillatoolkit.org/pkg/mux
|
|
@ -1,21 +0,0 @@
|
|||
// Copyright 2012 The Gorilla Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mux
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkMux(b *testing.B) {
|
||||
router := new(Router)
|
||||
handler := func(w http.ResponseWriter, r *http.Request) {}
|
||||
router.HandleFunc("/v1/{v1}", handler)
|
||||
|
||||
request, _ := http.NewRequest("GET", "/v1/anything", nil)
|
||||
for i := 0; i < b.N; i++ {
|
||||
router.ServeHTTP(nil, request)
|
||||
}
|
||||
}
|
|
@ -1,199 +0,0 @@
|
|||
// Copyright 2012 The Gorilla Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
Package gorilla/mux implements a request router and dispatcher.
|
||||
|
||||
The name mux stands for "HTTP request multiplexer". Like the standard
|
||||
http.ServeMux, mux.Router matches incoming requests against a list of
|
||||
registered routes and calls a handler for the route that matches the URL
|
||||
or other conditions. The main features are:
|
||||
|
||||
* Requests can be matched based on URL host, path, path prefix, schemes,
|
||||
header and query values, HTTP methods or using custom matchers.
|
||||
* URL hosts and paths can have variables with an optional regular
|
||||
expression.
|
||||
* Registered URLs can be built, or "reversed", which helps maintaining
|
||||
references to resources.
|
||||
* Routes can be used as subrouters: nested routes are only tested if the
|
||||
parent route matches. This is useful to define groups of routes that
|
||||
share common conditions like a host, a path prefix or other repeated
|
||||
attributes. As a bonus, this optimizes request matching.
|
||||
* It implements the http.Handler interface so it is compatible with the
|
||||
standard http.ServeMux.
|
||||
|
||||
Let's start registering a couple of URL paths and handlers:
|
||||
|
||||
func main() {
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/", HomeHandler)
|
||||
r.HandleFunc("/products", ProductsHandler)
|
||||
r.HandleFunc("/articles", ArticlesHandler)
|
||||
http.Handle("/", r)
|
||||
}
|
||||
|
||||
Here we register three routes mapping URL paths to handlers. This is
|
||||
equivalent to how http.HandleFunc() works: if an incoming request URL matches
|
||||
one of the paths, the corresponding handler is called passing
|
||||
(http.ResponseWriter, *http.Request) as parameters.
|
||||
|
||||
Paths can have variables. They are defined using the format {name} or
|
||||
{name:pattern}. If a regular expression pattern is not defined, the matched
|
||||
variable will be anything until the next slash. For example:
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/products/{key}", ProductHandler)
|
||||
r.HandleFunc("/articles/{category}/", ArticlesCategoryHandler)
|
||||
r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler)
|
||||
|
||||
The names are used to create a map of route variables which can be retrieved
|
||||
calling mux.Vars():
|
||||
|
||||
vars := mux.Vars(request)
|
||||
category := vars["category"]
|
||||
|
||||
And this is all you need to know about the basic usage. More advanced options
|
||||
are explained below.
|
||||
|
||||
Routes can also be restricted to a domain or subdomain. Just define a host
|
||||
pattern to be matched. They can also have variables:
|
||||
|
||||
r := mux.NewRouter()
|
||||
// Only matches if domain is "www.domain.com".
|
||||
r.Host("www.domain.com")
|
||||
// Matches a dynamic subdomain.
|
||||
r.Host("{subdomain:[a-z]+}.domain.com")
|
||||
|
||||
There are several other matchers that can be added. To match path prefixes:
|
||||
|
||||
r.PathPrefix("/products/")
|
||||
|
||||
...or HTTP methods:
|
||||
|
||||
r.Methods("GET", "POST")
|
||||
|
||||
...or URL schemes:
|
||||
|
||||
r.Schemes("https")
|
||||
|
||||
...or header values:
|
||||
|
||||
r.Headers("X-Requested-With", "XMLHttpRequest")
|
||||
|
||||
...or query values:
|
||||
|
||||
r.Queries("key", "value")
|
||||
|
||||
...or to use a custom matcher function:
|
||||
|
||||
r.MatcherFunc(func(r *http.Request, rm *RouteMatch) bool {
|
||||
return r.ProtoMajor == 0
|
||||
})
|
||||
|
||||
...and finally, it is possible to combine several matchers in a single route:
|
||||
|
||||
r.HandleFunc("/products", ProductsHandler).
|
||||
Host("www.domain.com").
|
||||
Methods("GET").
|
||||
Schemes("http")
|
||||
|
||||
Setting the same matching conditions again and again can be boring, so we have
|
||||
a way to group several routes that share the same requirements.
|
||||
We call it "subrouting".
|
||||
|
||||
For example, let's say we have several URLs that should only match when the
|
||||
host is "www.domain.com". Create a route for that host and get a "subrouter"
|
||||
from it:
|
||||
|
||||
r := mux.NewRouter()
|
||||
s := r.Host("www.domain.com").Subrouter()
|
||||
|
||||
Then register routes in the subrouter:
|
||||
|
||||
s.HandleFunc("/products/", ProductsHandler)
|
||||
s.HandleFunc("/products/{key}", ProductHandler)
|
||||
s.HandleFunc("/articles/{category}/{id:[0-9]+}"), ArticleHandler)
|
||||
|
||||
The three URL paths we registered above will only be tested if the domain is
|
||||
"www.domain.com", because the subrouter is tested first. This is not
|
||||
only convenient, but also optimizes request matching. You can create
|
||||
subrouters combining any attribute matchers accepted by a route.
|
||||
|
||||
Subrouters can be used to create domain or path "namespaces": you define
|
||||
subrouters in a central place and then parts of the app can register its
|
||||
paths relatively to a given subrouter.
|
||||
|
||||
There's one more thing about subroutes. When a subrouter has a path prefix,
|
||||
the inner routes use it as base for their paths:
|
||||
|
||||
r := mux.NewRouter()
|
||||
s := r.PathPrefix("/products").Subrouter()
|
||||
// "/products/"
|
||||
s.HandleFunc("/", ProductsHandler)
|
||||
// "/products/{key}/"
|
||||
s.HandleFunc("/{key}/", ProductHandler)
|
||||
// "/products/{key}/details"
|
||||
s.HandleFunc("/{key}/details", ProductDetailsHandler)
|
||||
|
||||
Now let's see how to build registered URLs.
|
||||
|
||||
Routes can be named. All routes that define a name can have their URLs built,
|
||||
or "reversed". We define a name calling Name() on a route. For example:
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler).
|
||||
Name("article")
|
||||
|
||||
To build a URL, get the route and call the URL() method, passing a sequence of
|
||||
key/value pairs for the route variables. For the previous route, we would do:
|
||||
|
||||
url, err := r.Get("article").URL("category", "technology", "id", "42")
|
||||
|
||||
...and the result will be a url.URL with the following path:
|
||||
|
||||
"/articles/technology/42"
|
||||
|
||||
This also works for host variables:
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.Host("{subdomain}.domain.com").
|
||||
Path("/articles/{category}/{id:[0-9]+}").
|
||||
HandlerFunc(ArticleHandler).
|
||||
Name("article")
|
||||
|
||||
// url.String() will be "http://news.domain.com/articles/technology/42"
|
||||
url, err := r.Get("article").URL("subdomain", "news",
|
||||
"category", "technology",
|
||||
"id", "42")
|
||||
|
||||
All variables defined in the route are required, and their values must
|
||||
conform to the corresponding patterns. These requirements guarantee that a
|
||||
generated URL will always match a registered route -- the only exception is
|
||||
for explicitly defined "build-only" routes which never match.
|
||||
|
||||
There's also a way to build only the URL host or path for a route:
|
||||
use the methods URLHost() or URLPath() instead. For the previous route,
|
||||
we would do:
|
||||
|
||||
// "http://news.domain.com/"
|
||||
host, err := r.Get("article").URLHost("subdomain", "news")
|
||||
|
||||
// "/articles/technology/42"
|
||||
path, err := r.Get("article").URLPath("category", "technology", "id", "42")
|
||||
|
||||
And if you use subrouters, host and path defined separately can be built
|
||||
as well:
|
||||
|
||||
r := mux.NewRouter()
|
||||
s := r.Host("{subdomain}.domain.com").Subrouter()
|
||||
s.Path("/articles/{category}/{id:[0-9]+}").
|
||||
HandlerFunc(ArticleHandler).
|
||||
Name("article")
|
||||
|
||||
// "http://news.domain.com/articles/technology/42"
|
||||
url, err := r.Get("article").URL("subdomain", "news",
|
||||
"category", "technology",
|
||||
"id", "42")
|
||||
*/
|
||||
package mux
|
|
@ -1,353 +0,0 @@
|
|||
// Copyright 2012 The Gorilla Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mux
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"path"
|
||||
|
||||
"github.com/gorilla/context"
|
||||
)
|
||||
|
||||
// NewRouter returns a new router instance.
|
||||
func NewRouter() *Router {
|
||||
return &Router{namedRoutes: make(map[string]*Route), KeepContext: false}
|
||||
}
|
||||
|
||||
// Router registers routes to be matched and dispatches a handler.
|
||||
//
|
||||
// It implements the http.Handler interface, so it can be registered to serve
|
||||
// requests:
|
||||
//
|
||||
// var router = mux.NewRouter()
|
||||
//
|
||||
// func main() {
|
||||
// http.Handle("/", router)
|
||||
// }
|
||||
//
|
||||
// Or, for Google App Engine, register it in a init() function:
|
||||
//
|
||||
// func init() {
|
||||
// http.Handle("/", router)
|
||||
// }
|
||||
//
|
||||
// This will send all incoming requests to the router.
|
||||
type Router struct {
|
||||
// Configurable Handler to be used when no route matches.
|
||||
NotFoundHandler http.Handler
|
||||
// Parent route, if this is a subrouter.
|
||||
parent parentRoute
|
||||
// Routes to be matched, in order.
|
||||
routes []*Route
|
||||
// Routes by name for URL building.
|
||||
namedRoutes map[string]*Route
|
||||
// See Router.StrictSlash(). This defines the flag for new routes.
|
||||
strictSlash bool
|
||||
// If true, do not clear the request context after handling the request
|
||||
KeepContext bool
|
||||
}
|
||||
|
||||
// Match matches registered routes against the request.
|
||||
func (r *Router) Match(req *http.Request, match *RouteMatch) bool {
|
||||
for _, route := range r.routes {
|
||||
if route.Match(req, match) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ServeHTTP dispatches the handler registered in the matched route.
|
||||
//
|
||||
// When there is a match, the route variables can be retrieved calling
|
||||
// mux.Vars(request).
|
||||
func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
// Clean path to canonical form and redirect.
|
||||
if p := cleanPath(req.URL.Path); p != req.URL.Path {
|
||||
|
||||
// Added 3 lines (Philip Schlump) - It was droping the query string and #whatever from query.
|
||||
// This matches with fix in go 1.2 r.c. 4 for same problem. Go Issue:
|
||||
// http://code.google.com/p/go/issues/detail?id=5252
|
||||
url := *req.URL
|
||||
url.Path = p
|
||||
p = url.String()
|
||||
|
||||
w.Header().Set("Location", p)
|
||||
w.WriteHeader(http.StatusMovedPermanently)
|
||||
return
|
||||
}
|
||||
var match RouteMatch
|
||||
var handler http.Handler
|
||||
if r.Match(req, &match) {
|
||||
handler = match.Handler
|
||||
setVars(req, match.Vars)
|
||||
setCurrentRoute(req, match.Route)
|
||||
}
|
||||
if handler == nil {
|
||||
handler = r.NotFoundHandler
|
||||
if handler == nil {
|
||||
handler = http.NotFoundHandler()
|
||||
}
|
||||
}
|
||||
if !r.KeepContext {
|
||||
defer context.Clear(req)
|
||||
}
|
||||
handler.ServeHTTP(w, req)
|
||||
}
|
||||
|
||||
// Get returns a route registered with the given name.
|
||||
func (r *Router) Get(name string) *Route {
|
||||
return r.getNamedRoutes()[name]
|
||||
}
|
||||
|
||||
// GetRoute returns a route registered with the given name. This method
|
||||
// was renamed to Get() and remains here for backwards compatibility.
|
||||
func (r *Router) GetRoute(name string) *Route {
|
||||
return r.getNamedRoutes()[name]
|
||||
}
|
||||
|
||||
// StrictSlash defines the trailing slash behavior for new routes. The initial
|
||||
// value is false.
|
||||
//
|
||||
// When true, if the route path is "/path/", accessing "/path" will redirect
|
||||
// to the former and vice versa. In other words, your application will always
|
||||
// see the path as specified in the route.
|
||||
//
|
||||
// When false, if the route path is "/path", accessing "/path/" will not match
|
||||
// this route and vice versa.
|
||||
//
|
||||
// Special case: when a route sets a path prefix using the PathPrefix() method,
|
||||
// strict slash is ignored for that route because the redirect behavior can't
|
||||
// be determined from a prefix alone. However, any subrouters created from that
|
||||
// route inherit the original StrictSlash setting.
|
||||
func (r *Router) StrictSlash(value bool) *Router {
|
||||
r.strictSlash = value
|
||||
return r
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// parentRoute
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// getNamedRoutes returns the map where named routes are registered.
|
||||
func (r *Router) getNamedRoutes() map[string]*Route {
|
||||
if r.namedRoutes == nil {
|
||||
if r.parent != nil {
|
||||
r.namedRoutes = r.parent.getNamedRoutes()
|
||||
} else {
|
||||
r.namedRoutes = make(map[string]*Route)
|
||||
}
|
||||
}
|
||||
return r.namedRoutes
|
||||
}
|
||||
|
||||
// getRegexpGroup returns regexp definitions from the parent route, if any.
|
||||
func (r *Router) getRegexpGroup() *routeRegexpGroup {
|
||||
if r.parent != nil {
|
||||
return r.parent.getRegexpGroup()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Route factories
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// NewRoute registers an empty route.
|
||||
func (r *Router) NewRoute() *Route {
|
||||
route := &Route{parent: r, strictSlash: r.strictSlash}
|
||||
r.routes = append(r.routes, route)
|
||||
return route
|
||||
}
|
||||
|
||||
// Handle registers a new route with a matcher for the URL path.
|
||||
// See Route.Path() and Route.Handler().
|
||||
func (r *Router) Handle(path string, handler http.Handler) *Route {
|
||||
return r.NewRoute().Path(path).Handler(handler)
|
||||
}
|
||||
|
||||
// HandleFunc registers a new route with a matcher for the URL path.
|
||||
// See Route.Path() and Route.HandlerFunc().
|
||||
func (r *Router) HandleFunc(path string, f func(http.ResponseWriter,
|
||||
*http.Request)) *Route {
|
||||
return r.NewRoute().Path(path).HandlerFunc(f)
|
||||
}
|
||||
|
||||
// Headers registers a new route with a matcher for request header values.
|
||||
// See Route.Headers().
|
||||
func (r *Router) Headers(pairs ...string) *Route {
|
||||
return r.NewRoute().Headers(pairs...)
|
||||
}
|
||||
|
||||
// Host registers a new route with a matcher for the URL host.
|
||||
// See Route.Host().
|
||||
func (r *Router) Host(tpl string) *Route {
|
||||
return r.NewRoute().Host(tpl)
|
||||
}
|
||||
|
||||
// MatcherFunc registers a new route with a custom matcher function.
|
||||
// See Route.MatcherFunc().
|
||||
func (r *Router) MatcherFunc(f MatcherFunc) *Route {
|
||||
return r.NewRoute().MatcherFunc(f)
|
||||
}
|
||||
|
||||
// Methods registers a new route with a matcher for HTTP methods.
|
||||
// See Route.Methods().
|
||||
func (r *Router) Methods(methods ...string) *Route {
|
||||
return r.NewRoute().Methods(methods...)
|
||||
}
|
||||
|
||||
// Path registers a new route with a matcher for the URL path.
|
||||
// See Route.Path().
|
||||
func (r *Router) Path(tpl string) *Route {
|
||||
return r.NewRoute().Path(tpl)
|
||||
}
|
||||
|
||||
// PathPrefix registers a new route with a matcher for the URL path prefix.
|
||||
// See Route.PathPrefix().
|
||||
func (r *Router) PathPrefix(tpl string) *Route {
|
||||
return r.NewRoute().PathPrefix(tpl)
|
||||
}
|
||||
|
||||
// Queries registers a new route with a matcher for URL query values.
|
||||
// See Route.Queries().
|
||||
func (r *Router) Queries(pairs ...string) *Route {
|
||||
return r.NewRoute().Queries(pairs...)
|
||||
}
|
||||
|
||||
// Schemes registers a new route with a matcher for URL schemes.
|
||||
// See Route.Schemes().
|
||||
func (r *Router) Schemes(schemes ...string) *Route {
|
||||
return r.NewRoute().Schemes(schemes...)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Context
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// RouteMatch stores information about a matched route.
|
||||
type RouteMatch struct {
|
||||
Route *Route
|
||||
Handler http.Handler
|
||||
Vars map[string]string
|
||||
}
|
||||
|
||||
type contextKey int
|
||||
|
||||
const (
|
||||
varsKey contextKey = iota
|
||||
routeKey
|
||||
)
|
||||
|
||||
// Vars returns the route variables for the current request, if any.
|
||||
func Vars(r *http.Request) map[string]string {
|
||||
if rv := context.Get(r, varsKey); rv != nil {
|
||||
return rv.(map[string]string)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CurrentRoute returns the matched route for the current request, if any.
|
||||
func CurrentRoute(r *http.Request) *Route {
|
||||
if rv := context.Get(r, routeKey); rv != nil {
|
||||
return rv.(*Route)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setVars(r *http.Request, val interface{}) {
|
||||
context.Set(r, varsKey, val)
|
||||
}
|
||||
|
||||
func setCurrentRoute(r *http.Request, val interface{}) {
|
||||
context.Set(r, routeKey, val)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// cleanPath returns the canonical path for p, eliminating . and .. elements.
|
||||
// Borrowed from the net/http package.
|
||||
func cleanPath(p string) string {
|
||||
if p == "" {
|
||||
return "/"
|
||||
}
|
||||
if p[0] != '/' {
|
||||
p = "/" + p
|
||||
}
|
||||
np := path.Clean(p)
|
||||
// path.Clean removes trailing slash except for root;
|
||||
// put the trailing slash back if necessary.
|
||||
if p[len(p)-1] == '/' && np != "/" {
|
||||
np += "/"
|
||||
}
|
||||
return np
|
||||
}
|
||||
|
||||
// uniqueVars returns an error if two slices contain duplicated strings.
|
||||
func uniqueVars(s1, s2 []string) error {
|
||||
for _, v1 := range s1 {
|
||||
for _, v2 := range s2 {
|
||||
if v1 == v2 {
|
||||
return fmt.Errorf("mux: duplicated route variable %q", v2)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// mapFromPairs converts variadic string parameters to a string map.
|
||||
func mapFromPairs(pairs ...string) (map[string]string, error) {
|
||||
length := len(pairs)
|
||||
if length%2 != 0 {
|
||||
return nil, fmt.Errorf(
|
||||
"mux: number of parameters must be multiple of 2, got %v", pairs)
|
||||
}
|
||||
m := make(map[string]string, length/2)
|
||||
for i := 0; i < length; i += 2 {
|
||||
m[pairs[i]] = pairs[i+1]
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// matchInArray returns true if the given string value is in the array.
|
||||
func matchInArray(arr []string, value string) bool {
|
||||
for _, v := range arr {
|
||||
if v == value {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// matchMap returns true if the given key/value pairs exist in a given map.
|
||||
func matchMap(toCheck map[string]string, toMatch map[string][]string,
|
||||
canonicalKey bool) bool {
|
||||
for k, v := range toCheck {
|
||||
// Check if key exists.
|
||||
if canonicalKey {
|
||||
k = http.CanonicalHeaderKey(k)
|
||||
}
|
||||
if values := toMatch[k]; values == nil {
|
||||
return false
|
||||
} else if v != "" {
|
||||
// If value was defined as an empty string we only check that the
|
||||
// key exists. Otherwise we also check for equality.
|
||||
valueExists := false
|
||||
for _, value := range values {
|
||||
if v == value {
|
||||
valueExists = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !valueExists {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -1,943 +0,0 @@
|
|||
// Copyright 2012 The Gorilla Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mux
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"testing"
|
||||
|
||||
"github.com/gorilla/context"
|
||||
)
|
||||
|
||||
type routeTest struct {
|
||||
title string // title of the test
|
||||
route *Route // the route being tested
|
||||
request *http.Request // a request to test the route
|
||||
vars map[string]string // the expected vars of the match
|
||||
host string // the expected host of the match
|
||||
path string // the expected path of the match
|
||||
shouldMatch bool // whether the request is expected to match the route at all
|
||||
shouldRedirect bool // whether the request should result in a redirect
|
||||
}
|
||||
|
||||
func TestHost(t *testing.T) {
|
||||
// newRequestHost a new request with a method, url, and host header
|
||||
newRequestHost := func(method, url, host string) *http.Request {
|
||||
req, err := http.NewRequest(method, url, nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
req.Host = host
|
||||
return req
|
||||
}
|
||||
|
||||
tests := []routeTest{
|
||||
{
|
||||
title: "Host route match",
|
||||
route: new(Route).Host("aaa.bbb.ccc"),
|
||||
request: newRequest("GET", "http://aaa.bbb.ccc/111/222/333"),
|
||||
vars: map[string]string{},
|
||||
host: "aaa.bbb.ccc",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Host route, wrong host in request URL",
|
||||
route: new(Route).Host("aaa.bbb.ccc"),
|
||||
request: newRequest("GET", "http://aaa.222.ccc/111/222/333"),
|
||||
vars: map[string]string{},
|
||||
host: "aaa.bbb.ccc",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "Host route with port, match",
|
||||
route: new(Route).Host("aaa.bbb.ccc:1234"),
|
||||
request: newRequest("GET", "http://aaa.bbb.ccc:1234/111/222/333"),
|
||||
vars: map[string]string{},
|
||||
host: "aaa.bbb.ccc:1234",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Host route with port, wrong port in request URL",
|
||||
route: new(Route).Host("aaa.bbb.ccc:1234"),
|
||||
request: newRequest("GET", "http://aaa.bbb.ccc:9999/111/222/333"),
|
||||
vars: map[string]string{},
|
||||
host: "aaa.bbb.ccc:1234",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "Host route, match with host in request header",
|
||||
route: new(Route).Host("aaa.bbb.ccc"),
|
||||
request: newRequestHost("GET", "/111/222/333", "aaa.bbb.ccc"),
|
||||
vars: map[string]string{},
|
||||
host: "aaa.bbb.ccc",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Host route, wrong host in request header",
|
||||
route: new(Route).Host("aaa.bbb.ccc"),
|
||||
request: newRequestHost("GET", "/111/222/333", "aaa.222.ccc"),
|
||||
vars: map[string]string{},
|
||||
host: "aaa.bbb.ccc",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
// BUG {new(Route).Host("aaa.bbb.ccc:1234"), newRequestHost("GET", "/111/222/333", "aaa.bbb.ccc:1234"), map[string]string{}, "aaa.bbb.ccc:1234", "", true},
|
||||
{
|
||||
title: "Host route with port, wrong host in request header",
|
||||
route: new(Route).Host("aaa.bbb.ccc:1234"),
|
||||
request: newRequestHost("GET", "/111/222/333", "aaa.bbb.ccc:9999"),
|
||||
vars: map[string]string{},
|
||||
host: "aaa.bbb.ccc:1234",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "Host route with pattern, match",
|
||||
route: new(Route).Host("aaa.{v1:[a-z]{3}}.ccc"),
|
||||
request: newRequest("GET", "http://aaa.bbb.ccc/111/222/333"),
|
||||
vars: map[string]string{"v1": "bbb"},
|
||||
host: "aaa.bbb.ccc",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Host route with pattern, wrong host in request URL",
|
||||
route: new(Route).Host("aaa.{v1:[a-z]{3}}.ccc"),
|
||||
request: newRequest("GET", "http://aaa.222.ccc/111/222/333"),
|
||||
vars: map[string]string{"v1": "bbb"},
|
||||
host: "aaa.bbb.ccc",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "Host route with multiple patterns, match",
|
||||
route: new(Route).Host("{v1:[a-z]{3}}.{v2:[a-z]{3}}.{v3:[a-z]{3}}"),
|
||||
request: newRequest("GET", "http://aaa.bbb.ccc/111/222/333"),
|
||||
vars: map[string]string{"v1": "aaa", "v2": "bbb", "v3": "ccc"},
|
||||
host: "aaa.bbb.ccc",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Host route with multiple patterns, wrong host in request URL",
|
||||
route: new(Route).Host("{v1:[a-z]{3}}.{v2:[a-z]{3}}.{v3:[a-z]{3}}"),
|
||||
request: newRequest("GET", "http://aaa.222.ccc/111/222/333"),
|
||||
vars: map[string]string{"v1": "aaa", "v2": "bbb", "v3": "ccc"},
|
||||
host: "aaa.bbb.ccc",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
testRoute(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPath(t *testing.T) {
|
||||
tests := []routeTest{
|
||||
{
|
||||
title: "Path route, match",
|
||||
route: new(Route).Path("/111/222/333"),
|
||||
request: newRequest("GET", "http://localhost/111/222/333"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/111/222/333",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Path route, match with trailing slash in request and path",
|
||||
route: new(Route).Path("/111/"),
|
||||
request: newRequest("GET", "http://localhost/111/"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/111/",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Path route, do not match with trailing slash in path",
|
||||
route: new(Route).Path("/111/"),
|
||||
request: newRequest("GET", "http://localhost/111"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/111",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "Path route, do not match with trailing slash in request",
|
||||
route: new(Route).Path("/111"),
|
||||
request: newRequest("GET", "http://localhost/111/"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/111/",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "Path route, wrong path in request in request URL",
|
||||
route: new(Route).Path("/111/222/333"),
|
||||
request: newRequest("GET", "http://localhost/1/2/3"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/111/222/333",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "Path route with pattern, match",
|
||||
route: new(Route).Path("/111/{v1:[0-9]{3}}/333"),
|
||||
request: newRequest("GET", "http://localhost/111/222/333"),
|
||||
vars: map[string]string{"v1": "222"},
|
||||
host: "",
|
||||
path: "/111/222/333",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Path route with pattern, URL in request does not match",
|
||||
route: new(Route).Path("/111/{v1:[0-9]{3}}/333"),
|
||||
request: newRequest("GET", "http://localhost/111/aaa/333"),
|
||||
vars: map[string]string{"v1": "222"},
|
||||
host: "",
|
||||
path: "/111/222/333",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "Path route with multiple patterns, match",
|
||||
route: new(Route).Path("/{v1:[0-9]{3}}/{v2:[0-9]{3}}/{v3:[0-9]{3}}"),
|
||||
request: newRequest("GET", "http://localhost/111/222/333"),
|
||||
vars: map[string]string{"v1": "111", "v2": "222", "v3": "333"},
|
||||
host: "",
|
||||
path: "/111/222/333",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Path route with multiple patterns, URL in request does not match",
|
||||
route: new(Route).Path("/{v1:[0-9]{3}}/{v2:[0-9]{3}}/{v3:[0-9]{3}}"),
|
||||
request: newRequest("GET", "http://localhost/111/aaa/333"),
|
||||
vars: map[string]string{"v1": "111", "v2": "222", "v3": "333"},
|
||||
host: "",
|
||||
path: "/111/222/333",
|
||||
shouldMatch: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
testRoute(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPathPrefix(t *testing.T) {
|
||||
tests := []routeTest{
|
||||
{
|
||||
title: "PathPrefix route, match",
|
||||
route: new(Route).PathPrefix("/111"),
|
||||
request: newRequest("GET", "http://localhost/111/222/333"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/111",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "PathPrefix route, match substring",
|
||||
route: new(Route).PathPrefix("/1"),
|
||||
request: newRequest("GET", "http://localhost/111/222/333"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/1",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "PathPrefix route, URL prefix in request does not match",
|
||||
route: new(Route).PathPrefix("/111"),
|
||||
request: newRequest("GET", "http://localhost/1/2/3"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/111",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "PathPrefix route with pattern, match",
|
||||
route: new(Route).PathPrefix("/111/{v1:[0-9]{3}}"),
|
||||
request: newRequest("GET", "http://localhost/111/222/333"),
|
||||
vars: map[string]string{"v1": "222"},
|
||||
host: "",
|
||||
path: "/111/222",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "PathPrefix route with pattern, URL prefix in request does not match",
|
||||
route: new(Route).PathPrefix("/111/{v1:[0-9]{3}}"),
|
||||
request: newRequest("GET", "http://localhost/111/aaa/333"),
|
||||
vars: map[string]string{"v1": "222"},
|
||||
host: "",
|
||||
path: "/111/222",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "PathPrefix route with multiple patterns, match",
|
||||
route: new(Route).PathPrefix("/{v1:[0-9]{3}}/{v2:[0-9]{3}}"),
|
||||
request: newRequest("GET", "http://localhost/111/222/333"),
|
||||
vars: map[string]string{"v1": "111", "v2": "222"},
|
||||
host: "",
|
||||
path: "/111/222",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "PathPrefix route with multiple patterns, URL prefix in request does not match",
|
||||
route: new(Route).PathPrefix("/{v1:[0-9]{3}}/{v2:[0-9]{3}}"),
|
||||
request: newRequest("GET", "http://localhost/111/aaa/333"),
|
||||
vars: map[string]string{"v1": "111", "v2": "222"},
|
||||
host: "",
|
||||
path: "/111/222",
|
||||
shouldMatch: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
testRoute(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostPath(t *testing.T) {
|
||||
tests := []routeTest{
|
||||
{
|
||||
title: "Host and Path route, match",
|
||||
route: new(Route).Host("aaa.bbb.ccc").Path("/111/222/333"),
|
||||
request: newRequest("GET", "http://aaa.bbb.ccc/111/222/333"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Host and Path route, wrong host in request URL",
|
||||
route: new(Route).Host("aaa.bbb.ccc").Path("/111/222/333"),
|
||||
request: newRequest("GET", "http://aaa.222.ccc/111/222/333"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "Host and Path route with pattern, match",
|
||||
route: new(Route).Host("aaa.{v1:[a-z]{3}}.ccc").Path("/111/{v2:[0-9]{3}}/333"),
|
||||
request: newRequest("GET", "http://aaa.bbb.ccc/111/222/333"),
|
||||
vars: map[string]string{"v1": "bbb", "v2": "222"},
|
||||
host: "aaa.bbb.ccc",
|
||||
path: "/111/222/333",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Host and Path route with pattern, URL in request does not match",
|
||||
route: new(Route).Host("aaa.{v1:[a-z]{3}}.ccc").Path("/111/{v2:[0-9]{3}}/333"),
|
||||
request: newRequest("GET", "http://aaa.222.ccc/111/222/333"),
|
||||
vars: map[string]string{"v1": "bbb", "v2": "222"},
|
||||
host: "aaa.bbb.ccc",
|
||||
path: "/111/222/333",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "Host and Path route with multiple patterns, match",
|
||||
route: new(Route).Host("{v1:[a-z]{3}}.{v2:[a-z]{3}}.{v3:[a-z]{3}}").Path("/{v4:[0-9]{3}}/{v5:[0-9]{3}}/{v6:[0-9]{3}}"),
|
||||
request: newRequest("GET", "http://aaa.bbb.ccc/111/222/333"),
|
||||
vars: map[string]string{"v1": "aaa", "v2": "bbb", "v3": "ccc", "v4": "111", "v5": "222", "v6": "333"},
|
||||
host: "aaa.bbb.ccc",
|
||||
path: "/111/222/333",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Host and Path route with multiple patterns, URL in request does not match",
|
||||
route: new(Route).Host("{v1:[a-z]{3}}.{v2:[a-z]{3}}.{v3:[a-z]{3}}").Path("/{v4:[0-9]{3}}/{v5:[0-9]{3}}/{v6:[0-9]{3}}"),
|
||||
request: newRequest("GET", "http://aaa.222.ccc/111/222/333"),
|
||||
vars: map[string]string{"v1": "aaa", "v2": "bbb", "v3": "ccc", "v4": "111", "v5": "222", "v6": "333"},
|
||||
host: "aaa.bbb.ccc",
|
||||
path: "/111/222/333",
|
||||
shouldMatch: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
testRoute(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeaders(t *testing.T) {
|
||||
// newRequestHeaders creates a new request with a method, url, and headers
|
||||
newRequestHeaders := func(method, url string, headers map[string]string) *http.Request {
|
||||
req, err := http.NewRequest(method, url, nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for k, v := range headers {
|
||||
req.Header.Add(k, v)
|
||||
}
|
||||
return req
|
||||
}
|
||||
|
||||
tests := []routeTest{
|
||||
{
|
||||
title: "Headers route, match",
|
||||
route: new(Route).Headers("foo", "bar", "baz", "ding"),
|
||||
request: newRequestHeaders("GET", "http://localhost", map[string]string{"foo": "bar", "baz": "ding"}),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Headers route, bad header values",
|
||||
route: new(Route).Headers("foo", "bar", "baz", "ding"),
|
||||
request: newRequestHeaders("GET", "http://localhost", map[string]string{"foo": "bar", "baz": "dong"}),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
testRoute(t, test)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestMethods(t *testing.T) {
|
||||
tests := []routeTest{
|
||||
{
|
||||
title: "Methods route, match GET",
|
||||
route: new(Route).Methods("GET", "POST"),
|
||||
request: newRequest("GET", "http://localhost"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Methods route, match POST",
|
||||
route: new(Route).Methods("GET", "POST"),
|
||||
request: newRequest("POST", "http://localhost"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Methods route, bad method",
|
||||
route: new(Route).Methods("GET", "POST"),
|
||||
request: newRequest("PUT", "http://localhost"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
testRoute(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQueries(t *testing.T) {
|
||||
tests := []routeTest{
|
||||
{
|
||||
title: "Queries route, match",
|
||||
route: new(Route).Queries("foo", "bar", "baz", "ding"),
|
||||
request: newRequest("GET", "http://localhost?foo=bar&baz=ding"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Queries route, match with a query string",
|
||||
route: new(Route).Host("www.example.com").Path("/api").Queries("foo", "bar", "baz", "ding"),
|
||||
request: newRequest("GET", "http://www.example.com/api?foo=bar&baz=ding"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Queries route, match with a query string out of order",
|
||||
route: new(Route).Host("www.example.com").Path("/api").Queries("foo", "bar", "baz", "ding"),
|
||||
request: newRequest("GET", "http://www.example.com/api?baz=ding&foo=bar"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Queries route, bad query",
|
||||
route: new(Route).Queries("foo", "bar", "baz", "ding"),
|
||||
request: newRequest("GET", "http://localhost?foo=bar&baz=dong"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
title: "Queries route with pattern, match",
|
||||
route: new(Route).Queries("foo", "{v1}"),
|
||||
request: newRequest("GET", "http://localhost?foo=bar"),
|
||||
vars: map[string]string{"v1": "bar"},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Queries route with multiple patterns, match",
|
||||
route: new(Route).Queries("foo", "{v1}", "baz", "{v2}"),
|
||||
request: newRequest("GET", "http://localhost?foo=bar&baz=ding"),
|
||||
vars: map[string]string{"v1": "bar", "v2": "ding"},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Queries route with regexp pattern, match",
|
||||
route: new(Route).Queries("foo", "{v1:[0-9]+}"),
|
||||
request: newRequest("GET", "http://localhost?foo=10"),
|
||||
vars: map[string]string{"v1": "10"},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Queries route with regexp pattern, regexp does not match",
|
||||
route: new(Route).Queries("foo", "{v1:[0-9]+}"),
|
||||
request: newRequest("GET", "http://localhost?foo=a"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
testRoute(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSchemes(t *testing.T) {
|
||||
tests := []routeTest{
|
||||
// Schemes
|
||||
{
|
||||
title: "Schemes route, match https",
|
||||
route: new(Route).Schemes("https", "ftp"),
|
||||
request: newRequest("GET", "https://localhost"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Schemes route, match ftp",
|
||||
route: new(Route).Schemes("https", "ftp"),
|
||||
request: newRequest("GET", "ftp://localhost"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "Schemes route, bad scheme",
|
||||
route: new(Route).Schemes("https", "ftp"),
|
||||
request: newRequest("GET", "http://localhost"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
testRoute(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatcherFunc(t *testing.T) {
|
||||
m := func(r *http.Request, m *RouteMatch) bool {
|
||||
if r.URL.Host == "aaa.bbb.ccc" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
tests := []routeTest{
|
||||
{
|
||||
title: "MatchFunc route, match",
|
||||
route: new(Route).MatcherFunc(m),
|
||||
request: newRequest("GET", "http://aaa.bbb.ccc"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
title: "MatchFunc route, non-match",
|
||||
route: new(Route).MatcherFunc(m),
|
||||
request: newRequest("GET", "http://aaa.222.ccc"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "",
|
||||
shouldMatch: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
testRoute(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubRouter(t *testing.T) {
|
||||
subrouter1 := new(Route).Host("{v1:[a-z]+}.google.com").Subrouter()
|
||||
subrouter2 := new(Route).PathPrefix("/foo/{v1}").Subrouter()
|
||||
|
||||
tests := []routeTest{
|
||||
{
|
||||
route: subrouter1.Path("/{v2:[a-z]+}"),
|
||||
request: newRequest("GET", "http://aaa.google.com/bbb"),
|
||||
vars: map[string]string{"v1": "aaa", "v2": "bbb"},
|
||||
host: "aaa.google.com",
|
||||
path: "/bbb",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
route: subrouter1.Path("/{v2:[a-z]+}"),
|
||||
request: newRequest("GET", "http://111.google.com/111"),
|
||||
vars: map[string]string{"v1": "aaa", "v2": "bbb"},
|
||||
host: "aaa.google.com",
|
||||
path: "/bbb",
|
||||
shouldMatch: false,
|
||||
},
|
||||
{
|
||||
route: subrouter2.Path("/baz/{v2}"),
|
||||
request: newRequest("GET", "http://localhost/foo/bar/baz/ding"),
|
||||
vars: map[string]string{"v1": "bar", "v2": "ding"},
|
||||
host: "",
|
||||
path: "/foo/bar/baz/ding",
|
||||
shouldMatch: true,
|
||||
},
|
||||
{
|
||||
route: subrouter2.Path("/baz/{v2}"),
|
||||
request: newRequest("GET", "http://localhost/foo/bar"),
|
||||
vars: map[string]string{"v1": "bar", "v2": "ding"},
|
||||
host: "",
|
||||
path: "/foo/bar/baz/ding",
|
||||
shouldMatch: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
testRoute(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNamedRoutes(t *testing.T) {
|
||||
r1 := NewRouter()
|
||||
r1.NewRoute().Name("a")
|
||||
r1.NewRoute().Name("b")
|
||||
r1.NewRoute().Name("c")
|
||||
|
||||
r2 := r1.NewRoute().Subrouter()
|
||||
r2.NewRoute().Name("d")
|
||||
r2.NewRoute().Name("e")
|
||||
r2.NewRoute().Name("f")
|
||||
|
||||
r3 := r2.NewRoute().Subrouter()
|
||||
r3.NewRoute().Name("g")
|
||||
r3.NewRoute().Name("h")
|
||||
r3.NewRoute().Name("i")
|
||||
|
||||
if r1.namedRoutes == nil || len(r1.namedRoutes) != 9 {
|
||||
t.Errorf("Expected 9 named routes, got %v", r1.namedRoutes)
|
||||
} else if r1.Get("i") == nil {
|
||||
t.Errorf("Subroute name not registered")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStrictSlash(t *testing.T) {
|
||||
r := NewRouter()
|
||||
r.StrictSlash(true)
|
||||
|
||||
tests := []routeTest{
|
||||
{
|
||||
title: "Redirect path without slash",
|
||||
route: r.NewRoute().Path("/111/"),
|
||||
request: newRequest("GET", "http://localhost/111"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/111/",
|
||||
shouldMatch: true,
|
||||
shouldRedirect: true,
|
||||
},
|
||||
{
|
||||
title: "Do not redirect path with slash",
|
||||
route: r.NewRoute().Path("/111/"),
|
||||
request: newRequest("GET", "http://localhost/111/"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/111/",
|
||||
shouldMatch: true,
|
||||
shouldRedirect: false,
|
||||
},
|
||||
{
|
||||
title: "Redirect path with slash",
|
||||
route: r.NewRoute().Path("/111"),
|
||||
request: newRequest("GET", "http://localhost/111/"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/111",
|
||||
shouldMatch: true,
|
||||
shouldRedirect: true,
|
||||
},
|
||||
{
|
||||
title: "Do not redirect path without slash",
|
||||
route: r.NewRoute().Path("/111"),
|
||||
request: newRequest("GET", "http://localhost/111"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/111",
|
||||
shouldMatch: true,
|
||||
shouldRedirect: false,
|
||||
},
|
||||
{
|
||||
title: "Propagate StrictSlash to subrouters",
|
||||
route: r.NewRoute().PathPrefix("/static/").Subrouter().Path("/images/"),
|
||||
request: newRequest("GET", "http://localhost/static/images"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/static/images/",
|
||||
shouldMatch: true,
|
||||
shouldRedirect: true,
|
||||
},
|
||||
{
|
||||
title: "Ignore StrictSlash for path prefix",
|
||||
route: r.NewRoute().PathPrefix("/static/"),
|
||||
request: newRequest("GET", "http://localhost/static/logo.png"),
|
||||
vars: map[string]string{},
|
||||
host: "",
|
||||
path: "/static/",
|
||||
shouldMatch: true,
|
||||
shouldRedirect: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
testRoute(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
func getRouteTemplate(route *Route) string {
|
||||
host, path := "none", "none"
|
||||
if route.regexp != nil {
|
||||
if route.regexp.host != nil {
|
||||
host = route.regexp.host.template
|
||||
}
|
||||
if route.regexp.path != nil {
|
||||
path = route.regexp.path.template
|
||||
}
|
||||
}
|
||||
return fmt.Sprintf("Host: %v, Path: %v", host, path)
|
||||
}
|
||||
|
||||
func testRoute(t *testing.T, test routeTest) {
|
||||
request := test.request
|
||||
route := test.route
|
||||
vars := test.vars
|
||||
shouldMatch := test.shouldMatch
|
||||
host := test.host
|
||||
path := test.path
|
||||
url := test.host + test.path
|
||||
shouldRedirect := test.shouldRedirect
|
||||
|
||||
var match RouteMatch
|
||||
ok := route.Match(request, &match)
|
||||
if ok != shouldMatch {
|
||||
msg := "Should match"
|
||||
if !shouldMatch {
|
||||
msg = "Should not match"
|
||||
}
|
||||
t.Errorf("(%v) %v:\nRoute: %#v\nRequest: %#v\nVars: %v\n", test.title, msg, route, request, vars)
|
||||
return
|
||||
}
|
||||
if shouldMatch {
|
||||
if test.vars != nil && !stringMapEqual(test.vars, match.Vars) {
|
||||
t.Errorf("(%v) Vars not equal: expected %v, got %v", test.title, vars, match.Vars)
|
||||
return
|
||||
}
|
||||
if host != "" {
|
||||
u, _ := test.route.URLHost(mapToPairs(match.Vars)...)
|
||||
if host != u.Host {
|
||||
t.Errorf("(%v) URLHost not equal: expected %v, got %v -- %v", test.title, host, u.Host, getRouteTemplate(route))
|
||||
return
|
||||
}
|
||||
}
|
||||
if path != "" {
|
||||
u, _ := route.URLPath(mapToPairs(match.Vars)...)
|
||||
if path != u.Path {
|
||||
t.Errorf("(%v) URLPath not equal: expected %v, got %v -- %v", test.title, path, u.Path, getRouteTemplate(route))
|
||||
return
|
||||
}
|
||||
}
|
||||
if url != "" {
|
||||
u, _ := route.URL(mapToPairs(match.Vars)...)
|
||||
if url != u.Host+u.Path {
|
||||
t.Errorf("(%v) URL not equal: expected %v, got %v -- %v", test.title, url, u.Host+u.Path, getRouteTemplate(route))
|
||||
return
|
||||
}
|
||||
}
|
||||
if shouldRedirect && match.Handler == nil {
|
||||
t.Errorf("(%v) Did not redirect", test.title)
|
||||
return
|
||||
}
|
||||
if !shouldRedirect && match.Handler != nil {
|
||||
t.Errorf("(%v) Unexpected redirect", test.title)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tests that the context is cleared or not cleared properly depending on
|
||||
// the configuration of the router
|
||||
func TestKeepContext(t *testing.T) {
|
||||
func1 := func(w http.ResponseWriter, r *http.Request) {}
|
||||
|
||||
r := NewRouter()
|
||||
r.HandleFunc("/", func1).Name("func1")
|
||||
|
||||
req, _ := http.NewRequest("GET", "http://localhost/", nil)
|
||||
context.Set(req, "t", 1)
|
||||
|
||||
res := new(http.ResponseWriter)
|
||||
r.ServeHTTP(*res, req)
|
||||
|
||||
if _, ok := context.GetOk(req, "t"); ok {
|
||||
t.Error("Context should have been cleared at end of request")
|
||||
}
|
||||
|
||||
r.KeepContext = true
|
||||
|
||||
req, _ = http.NewRequest("GET", "http://localhost/", nil)
|
||||
context.Set(req, "t", 1)
|
||||
|
||||
r.ServeHTTP(*res, req)
|
||||
if _, ok := context.GetOk(req, "t"); !ok {
|
||||
t.Error("Context should NOT have been cleared at end of request")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
type TestA301ResponseWriter struct {
|
||||
hh http.Header
|
||||
status int
|
||||
}
|
||||
|
||||
func (ho TestA301ResponseWriter) Header() http.Header {
|
||||
return http.Header(ho.hh)
|
||||
}
|
||||
|
||||
func (ho TestA301ResponseWriter) Write(b []byte) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (ho TestA301ResponseWriter) WriteHeader(code int) {
|
||||
ho.status = code
|
||||
}
|
||||
|
||||
func Test301Redirect(t *testing.T) {
|
||||
m := make(http.Header)
|
||||
|
||||
func1 := func(w http.ResponseWriter, r *http.Request) {}
|
||||
func2 := func(w http.ResponseWriter, r *http.Request) {}
|
||||
|
||||
r := NewRouter()
|
||||
r.HandleFunc("/api/", func2).Name("func2")
|
||||
r.HandleFunc("/", func1).Name("func1")
|
||||
|
||||
req, _ := http.NewRequest("GET", "http://localhost//api/?abc=def", nil)
|
||||
|
||||
res := TestA301ResponseWriter{
|
||||
hh: m,
|
||||
status: 0,
|
||||
}
|
||||
r.ServeHTTP(&res, req)
|
||||
|
||||
if "http://localhost/api/?abc=def" != res.hh["Location"][0] {
|
||||
t.Errorf("Should have complete URL with query string")
|
||||
}
|
||||
}
|
||||
|
||||
// https://plus.google.com/101022900381697718949/posts/eWy6DjFJ6uW
|
||||
func TestSubrouterHeader(t *testing.T) {
|
||||
expected := "func1 response"
|
||||
func1 := func(w http.ResponseWriter, r *http.Request) {
|
||||
fmt.Fprint(w, expected)
|
||||
}
|
||||
func2 := func(http.ResponseWriter, *http.Request) {}
|
||||
|
||||
r := NewRouter()
|
||||
s := r.Headers("SomeSpecialHeader", "").Subrouter()
|
||||
s.HandleFunc("/", func1).Name("func1")
|
||||
r.HandleFunc("/", func2).Name("func2")
|
||||
|
||||
req, _ := http.NewRequest("GET", "http://localhost/", nil)
|
||||
req.Header.Add("SomeSpecialHeader", "foo")
|
||||
match := new(RouteMatch)
|
||||
matched := r.Match(req, match)
|
||||
if !matched {
|
||||
t.Errorf("Should match request")
|
||||
}
|
||||
if match.Route.GetName() != "func1" {
|
||||
t.Errorf("Expecting func1 handler, got %s", match.Route.GetName())
|
||||
}
|
||||
resp := NewRecorder()
|
||||
match.Handler.ServeHTTP(resp, req)
|
||||
if resp.Body.String() != expected {
|
||||
t.Errorf("Expecting %q", expected)
|
||||
}
|
||||
}
|
||||
|
||||
// mapToPairs converts a string map to a slice of string pairs
|
||||
func mapToPairs(m map[string]string) []string {
|
||||
var i int
|
||||
p := make([]string, len(m)*2)
|
||||
for k, v := range m {
|
||||
p[i] = k
|
||||
p[i+1] = v
|
||||
i += 2
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// stringMapEqual checks the equality of two string maps
|
||||
func stringMapEqual(m1, m2 map[string]string) bool {
|
||||
nil1 := m1 == nil
|
||||
nil2 := m2 == nil
|
||||
if nil1 != nil2 || len(m1) != len(m2) {
|
||||
return false
|
||||
}
|
||||
for k, v := range m1 {
|
||||
if v != m2[k] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// newRequest is a helper function to create a new request with a method and url
|
||||
func newRequest(method, url string) *http.Request {
|
||||
req, err := http.NewRequest(method, url, nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return req
|
||||
}
|
|
@ -1,714 +0,0 @@
|
|||
// Old tests ported to Go1. This is a mess. Want to drop it one day.
|
||||
|
||||
// Copyright 2011 Gorilla Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mux
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"net/http"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// ResponseRecorder
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// ResponseRecorder is an implementation of http.ResponseWriter that
|
||||
// records its mutations for later inspection in tests.
|
||||
type ResponseRecorder struct {
|
||||
Code int // the HTTP response code from WriteHeader
|
||||
HeaderMap http.Header // the HTTP response headers
|
||||
Body *bytes.Buffer // if non-nil, the bytes.Buffer to append written data to
|
||||
Flushed bool
|
||||
}
|
||||
|
||||
// NewRecorder returns an initialized ResponseRecorder.
|
||||
func NewRecorder() *ResponseRecorder {
|
||||
return &ResponseRecorder{
|
||||
HeaderMap: make(http.Header),
|
||||
Body: new(bytes.Buffer),
|
||||
}
|
||||
}
|
||||
|
||||
// DefaultRemoteAddr is the default remote address to return in RemoteAddr if
|
||||
// an explicit DefaultRemoteAddr isn't set on ResponseRecorder.
|
||||
const DefaultRemoteAddr = "1.2.3.4"
|
||||
|
||||
// Header returns the response headers.
|
||||
func (rw *ResponseRecorder) Header() http.Header {
|
||||
return rw.HeaderMap
|
||||
}
|
||||
|
||||
// Write always succeeds and writes to rw.Body, if not nil.
|
||||
func (rw *ResponseRecorder) Write(buf []byte) (int, error) {
|
||||
if rw.Body != nil {
|
||||
rw.Body.Write(buf)
|
||||
}
|
||||
if rw.Code == 0 {
|
||||
rw.Code = http.StatusOK
|
||||
}
|
||||
return len(buf), nil
|
||||
}
|
||||
|
||||
// WriteHeader sets rw.Code.
|
||||
func (rw *ResponseRecorder) WriteHeader(code int) {
|
||||
rw.Code = code
|
||||
}
|
||||
|
||||
// Flush sets rw.Flushed to true.
|
||||
func (rw *ResponseRecorder) Flush() {
|
||||
rw.Flushed = true
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
func TestRouteMatchers(t *testing.T) {
|
||||
var scheme, host, path, query, method string
|
||||
var headers map[string]string
|
||||
var resultVars map[bool]map[string]string
|
||||
|
||||
router := NewRouter()
|
||||
router.NewRoute().Host("{var1}.google.com").
|
||||
Path("/{var2:[a-z]+}/{var3:[0-9]+}").
|
||||
Queries("foo", "bar").
|
||||
Methods("GET").
|
||||
Schemes("https").
|
||||
Headers("x-requested-with", "XMLHttpRequest")
|
||||
router.NewRoute().Host("www.{var4}.com").
|
||||
PathPrefix("/foo/{var5:[a-z]+}/{var6:[0-9]+}").
|
||||
Queries("baz", "ding").
|
||||
Methods("POST").
|
||||
Schemes("http").
|
||||
Headers("Content-Type", "application/json")
|
||||
|
||||
reset := func() {
|
||||
// Everything match.
|
||||
scheme = "https"
|
||||
host = "www.google.com"
|
||||
path = "/product/42"
|
||||
query = "?foo=bar"
|
||||
method = "GET"
|
||||
headers = map[string]string{"X-Requested-With": "XMLHttpRequest"}
|
||||
resultVars = map[bool]map[string]string{
|
||||
true: {"var1": "www", "var2": "product", "var3": "42"},
|
||||
false: {},
|
||||
}
|
||||
}
|
||||
|
||||
reset2 := func() {
|
||||
// Everything match.
|
||||
scheme = "http"
|
||||
host = "www.google.com"
|
||||
path = "/foo/product/42/path/that/is/ignored"
|
||||
query = "?baz=ding"
|
||||
method = "POST"
|
||||
headers = map[string]string{"Content-Type": "application/json"}
|
||||
resultVars = map[bool]map[string]string{
|
||||
true: {"var4": "google", "var5": "product", "var6": "42"},
|
||||
false: {},
|
||||
}
|
||||
}
|
||||
|
||||
match := func(shouldMatch bool) {
|
||||
url := scheme + "://" + host + path + query
|
||||
request, _ := http.NewRequest(method, url, nil)
|
||||
for key, value := range headers {
|
||||
request.Header.Add(key, value)
|
||||
}
|
||||
|
||||
var routeMatch RouteMatch
|
||||
matched := router.Match(request, &routeMatch)
|
||||
if matched != shouldMatch {
|
||||
// Need better messages. :)
|
||||
if matched {
|
||||
t.Errorf("Should match.")
|
||||
} else {
|
||||
t.Errorf("Should not match.")
|
||||
}
|
||||
}
|
||||
|
||||
if matched {
|
||||
currentRoute := routeMatch.Route
|
||||
if currentRoute == nil {
|
||||
t.Errorf("Expected a current route.")
|
||||
}
|
||||
vars := routeMatch.Vars
|
||||
expectedVars := resultVars[shouldMatch]
|
||||
if len(vars) != len(expectedVars) {
|
||||
t.Errorf("Expected vars: %v Got: %v.", expectedVars, vars)
|
||||
}
|
||||
for name, value := range vars {
|
||||
if expectedVars[name] != value {
|
||||
t.Errorf("Expected vars: %v Got: %v.", expectedVars, vars)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 1st route --------------------------------------------------------------
|
||||
|
||||
// Everything match.
|
||||
reset()
|
||||
match(true)
|
||||
|
||||
// Scheme doesn't match.
|
||||
reset()
|
||||
scheme = "http"
|
||||
match(false)
|
||||
|
||||
// Host doesn't match.
|
||||
reset()
|
||||
host = "www.mygoogle.com"
|
||||
match(false)
|
||||
|
||||
// Path doesn't match.
|
||||
reset()
|
||||
path = "/product/notdigits"
|
||||
match(false)
|
||||
|
||||
// Query doesn't match.
|
||||
reset()
|
||||
query = "?foo=baz"
|
||||
match(false)
|
||||
|
||||
// Method doesn't match.
|
||||
reset()
|
||||
method = "POST"
|
||||
match(false)
|
||||
|
||||
// Header doesn't match.
|
||||
reset()
|
||||
headers = map[string]string{}
|
||||
match(false)
|
||||
|
||||
// Everything match, again.
|
||||
reset()
|
||||
match(true)
|
||||
|
||||
// 2nd route --------------------------------------------------------------
|
||||
|
||||
// Everything match.
|
||||
reset2()
|
||||
match(true)
|
||||
|
||||
// Scheme doesn't match.
|
||||
reset2()
|
||||
scheme = "https"
|
||||
match(false)
|
||||
|
||||
// Host doesn't match.
|
||||
reset2()
|
||||
host = "sub.google.com"
|
||||
match(false)
|
||||
|
||||
// Path doesn't match.
|
||||
reset2()
|
||||
path = "/bar/product/42"
|
||||
match(false)
|
||||
|
||||
// Query doesn't match.
|
||||
reset2()
|
||||
query = "?foo=baz"
|
||||
match(false)
|
||||
|
||||
// Method doesn't match.
|
||||
reset2()
|
||||
method = "GET"
|
||||
match(false)
|
||||
|
||||
// Header doesn't match.
|
||||
reset2()
|
||||
headers = map[string]string{}
|
||||
match(false)
|
||||
|
||||
// Everything match, again.
|
||||
reset2()
|
||||
match(true)
|
||||
}
|
||||
|
||||
type headerMatcherTest struct {
|
||||
matcher headerMatcher
|
||||
headers map[string]string
|
||||
result bool
|
||||
}
|
||||
|
||||
var headerMatcherTests = []headerMatcherTest{
|
||||
{
|
||||
matcher: headerMatcher(map[string]string{"x-requested-with": "XMLHttpRequest"}),
|
||||
headers: map[string]string{"X-Requested-With": "XMLHttpRequest"},
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
matcher: headerMatcher(map[string]string{"x-requested-with": ""}),
|
||||
headers: map[string]string{"X-Requested-With": "anything"},
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
matcher: headerMatcher(map[string]string{"x-requested-with": "XMLHttpRequest"}),
|
||||
headers: map[string]string{},
|
||||
result: false,
|
||||
},
|
||||
}
|
||||
|
||||
type hostMatcherTest struct {
|
||||
matcher *Route
|
||||
url string
|
||||
vars map[string]string
|
||||
result bool
|
||||
}
|
||||
|
||||
var hostMatcherTests = []hostMatcherTest{
|
||||
{
|
||||
matcher: NewRouter().NewRoute().Host("{foo:[a-z][a-z][a-z]}.{bar:[a-z][a-z][a-z]}.{baz:[a-z][a-z][a-z]}"),
|
||||
url: "http://abc.def.ghi/",
|
||||
vars: map[string]string{"foo": "abc", "bar": "def", "baz": "ghi"},
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
matcher: NewRouter().NewRoute().Host("{foo:[a-z][a-z][a-z]}.{bar:[a-z][a-z][a-z]}.{baz:[a-z][a-z][a-z]}"),
|
||||
url: "http://a.b.c/",
|
||||
vars: map[string]string{"foo": "abc", "bar": "def", "baz": "ghi"},
|
||||
result: false,
|
||||
},
|
||||
}
|
||||
|
||||
type methodMatcherTest struct {
|
||||
matcher methodMatcher
|
||||
method string
|
||||
result bool
|
||||
}
|
||||
|
||||
var methodMatcherTests = []methodMatcherTest{
|
||||
{
|
||||
matcher: methodMatcher([]string{"GET", "POST", "PUT"}),
|
||||
method: "GET",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
matcher: methodMatcher([]string{"GET", "POST", "PUT"}),
|
||||
method: "POST",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
matcher: methodMatcher([]string{"GET", "POST", "PUT"}),
|
||||
method: "PUT",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
matcher: methodMatcher([]string{"GET", "POST", "PUT"}),
|
||||
method: "DELETE",
|
||||
result: false,
|
||||
},
|
||||
}
|
||||
|
||||
type pathMatcherTest struct {
|
||||
matcher *Route
|
||||
url string
|
||||
vars map[string]string
|
||||
result bool
|
||||
}
|
||||
|
||||
var pathMatcherTests = []pathMatcherTest{
|
||||
{
|
||||
matcher: NewRouter().NewRoute().Path("/{foo:[0-9][0-9][0-9]}/{bar:[0-9][0-9][0-9]}/{baz:[0-9][0-9][0-9]}"),
|
||||
url: "http://localhost:8080/123/456/789",
|
||||
vars: map[string]string{"foo": "123", "bar": "456", "baz": "789"},
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
matcher: NewRouter().NewRoute().Path("/{foo:[0-9][0-9][0-9]}/{bar:[0-9][0-9][0-9]}/{baz:[0-9][0-9][0-9]}"),
|
||||
url: "http://localhost:8080/1/2/3",
|
||||
vars: map[string]string{"foo": "123", "bar": "456", "baz": "789"},
|
||||
result: false,
|
||||
},
|
||||
}
|
||||
|
||||
type schemeMatcherTest struct {
|
||||
matcher schemeMatcher
|
||||
url string
|
||||
result bool
|
||||
}
|
||||
|
||||
var schemeMatcherTests = []schemeMatcherTest{
|
||||
{
|
||||
matcher: schemeMatcher([]string{"http", "https"}),
|
||||
url: "http://localhost:8080/",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
matcher: schemeMatcher([]string{"http", "https"}),
|
||||
url: "https://localhost:8080/",
|
||||
result: true,
|
||||
},
|
||||
{
|
||||
matcher: schemeMatcher([]string{"https"}),
|
||||
url: "http://localhost:8080/",
|
||||
result: false,
|
||||
},
|
||||
{
|
||||
matcher: schemeMatcher([]string{"http"}),
|
||||
url: "https://localhost:8080/",
|
||||
result: false,
|
||||
},
|
||||
}
|
||||
|
||||
type urlBuildingTest struct {
|
||||
route *Route
|
||||
vars []string
|
||||
url string
|
||||
}
|
||||
|
||||
var urlBuildingTests = []urlBuildingTest{
|
||||
{
|
||||
route: new(Route).Host("foo.domain.com"),
|
||||
vars: []string{},
|
||||
url: "http://foo.domain.com",
|
||||
},
|
||||
{
|
||||
route: new(Route).Host("{subdomain}.domain.com"),
|
||||
vars: []string{"subdomain", "bar"},
|
||||
url: "http://bar.domain.com",
|
||||
},
|
||||
{
|
||||
route: new(Route).Host("foo.domain.com").Path("/articles"),
|
||||
vars: []string{},
|
||||
url: "http://foo.domain.com/articles",
|
||||
},
|
||||
{
|
||||
route: new(Route).Path("/articles"),
|
||||
vars: []string{},
|
||||
url: "/articles",
|
||||
},
|
||||
{
|
||||
route: new(Route).Path("/articles/{category}/{id:[0-9]+}"),
|
||||
vars: []string{"category", "technology", "id", "42"},
|
||||
url: "/articles/technology/42",
|
||||
},
|
||||
{
|
||||
route: new(Route).Host("{subdomain}.domain.com").Path("/articles/{category}/{id:[0-9]+}"),
|
||||
vars: []string{"subdomain", "foo", "category", "technology", "id", "42"},
|
||||
url: "http://foo.domain.com/articles/technology/42",
|
||||
},
|
||||
}
|
||||
|
||||
func TestHeaderMatcher(t *testing.T) {
|
||||
for _, v := range headerMatcherTests {
|
||||
request, _ := http.NewRequest("GET", "http://localhost:8080/", nil)
|
||||
for key, value := range v.headers {
|
||||
request.Header.Add(key, value)
|
||||
}
|
||||
var routeMatch RouteMatch
|
||||
result := v.matcher.Match(request, &routeMatch)
|
||||
if result != v.result {
|
||||
if v.result {
|
||||
t.Errorf("%#v: should match %v.", v.matcher, request.Header)
|
||||
} else {
|
||||
t.Errorf("%#v: should not match %v.", v.matcher, request.Header)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostMatcher(t *testing.T) {
|
||||
for _, v := range hostMatcherTests {
|
||||
request, _ := http.NewRequest("GET", v.url, nil)
|
||||
var routeMatch RouteMatch
|
||||
result := v.matcher.Match(request, &routeMatch)
|
||||
vars := routeMatch.Vars
|
||||
if result != v.result {
|
||||
if v.result {
|
||||
t.Errorf("%#v: should match %v.", v.matcher, v.url)
|
||||
} else {
|
||||
t.Errorf("%#v: should not match %v.", v.matcher, v.url)
|
||||
}
|
||||
}
|
||||
if result {
|
||||
if len(vars) != len(v.vars) {
|
||||
t.Errorf("%#v: vars length should be %v, got %v.", v.matcher, len(v.vars), len(vars))
|
||||
}
|
||||
for name, value := range vars {
|
||||
if v.vars[name] != value {
|
||||
t.Errorf("%#v: expected value %v for key %v, got %v.", v.matcher, v.vars[name], name, value)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if len(vars) != 0 {
|
||||
t.Errorf("%#v: vars length should be 0, got %v.", v.matcher, len(vars))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMethodMatcher(t *testing.T) {
|
||||
for _, v := range methodMatcherTests {
|
||||
request, _ := http.NewRequest(v.method, "http://localhost:8080/", nil)
|
||||
var routeMatch RouteMatch
|
||||
result := v.matcher.Match(request, &routeMatch)
|
||||
if result != v.result {
|
||||
if v.result {
|
||||
t.Errorf("%#v: should match %v.", v.matcher, v.method)
|
||||
} else {
|
||||
t.Errorf("%#v: should not match %v.", v.matcher, v.method)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPathMatcher(t *testing.T) {
|
||||
for _, v := range pathMatcherTests {
|
||||
request, _ := http.NewRequest("GET", v.url, nil)
|
||||
var routeMatch RouteMatch
|
||||
result := v.matcher.Match(request, &routeMatch)
|
||||
vars := routeMatch.Vars
|
||||
if result != v.result {
|
||||
if v.result {
|
||||
t.Errorf("%#v: should match %v.", v.matcher, v.url)
|
||||
} else {
|
||||
t.Errorf("%#v: should not match %v.", v.matcher, v.url)
|
||||
}
|
||||
}
|
||||
if result {
|
||||
if len(vars) != len(v.vars) {
|
||||
t.Errorf("%#v: vars length should be %v, got %v.", v.matcher, len(v.vars), len(vars))
|
||||
}
|
||||
for name, value := range vars {
|
||||
if v.vars[name] != value {
|
||||
t.Errorf("%#v: expected value %v for key %v, got %v.", v.matcher, v.vars[name], name, value)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if len(vars) != 0 {
|
||||
t.Errorf("%#v: vars length should be 0, got %v.", v.matcher, len(vars))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSchemeMatcher(t *testing.T) {
|
||||
for _, v := range schemeMatcherTests {
|
||||
request, _ := http.NewRequest("GET", v.url, nil)
|
||||
var routeMatch RouteMatch
|
||||
result := v.matcher.Match(request, &routeMatch)
|
||||
if result != v.result {
|
||||
if v.result {
|
||||
t.Errorf("%#v: should match %v.", v.matcher, v.url)
|
||||
} else {
|
||||
t.Errorf("%#v: should not match %v.", v.matcher, v.url)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestUrlBuilding(t *testing.T) {
|
||||
|
||||
for _, v := range urlBuildingTests {
|
||||
u, _ := v.route.URL(v.vars...)
|
||||
url := u.String()
|
||||
if url != v.url {
|
||||
t.Errorf("expected %v, got %v", v.url, url)
|
||||
/*
|
||||
reversePath := ""
|
||||
reverseHost := ""
|
||||
if v.route.pathTemplate != nil {
|
||||
reversePath = v.route.pathTemplate.Reverse
|
||||
}
|
||||
if v.route.hostTemplate != nil {
|
||||
reverseHost = v.route.hostTemplate.Reverse
|
||||
}
|
||||
|
||||
t.Errorf("%#v:\nexpected: %q\ngot: %q\nreverse path: %q\nreverse host: %q", v.route, v.url, url, reversePath, reverseHost)
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
ArticleHandler := func(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
router := NewRouter()
|
||||
router.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler).Name("article")
|
||||
|
||||
url, _ := router.Get("article").URL("category", "technology", "id", "42")
|
||||
expected := "/articles/technology/42"
|
||||
if url.String() != expected {
|
||||
t.Errorf("Expected %v, got %v", expected, url.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchedRouteName(t *testing.T) {
|
||||
routeName := "stock"
|
||||
router := NewRouter()
|
||||
route := router.NewRoute().Path("/products/").Name(routeName)
|
||||
|
||||
url := "http://www.domain.com/products/"
|
||||
request, _ := http.NewRequest("GET", url, nil)
|
||||
var rv RouteMatch
|
||||
ok := router.Match(request, &rv)
|
||||
|
||||
if !ok || rv.Route != route {
|
||||
t.Errorf("Expected same route, got %+v.", rv.Route)
|
||||
}
|
||||
|
||||
retName := rv.Route.GetName()
|
||||
if retName != routeName {
|
||||
t.Errorf("Expected %q, got %q.", routeName, retName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubRouting(t *testing.T) {
|
||||
// Example from docs.
|
||||
router := NewRouter()
|
||||
subrouter := router.NewRoute().Host("www.domain.com").Subrouter()
|
||||
route := subrouter.NewRoute().Path("/products/").Name("products")
|
||||
|
||||
url := "http://www.domain.com/products/"
|
||||
request, _ := http.NewRequest("GET", url, nil)
|
||||
var rv RouteMatch
|
||||
ok := router.Match(request, &rv)
|
||||
|
||||
if !ok || rv.Route != route {
|
||||
t.Errorf("Expected same route, got %+v.", rv.Route)
|
||||
}
|
||||
|
||||
u, _ := router.Get("products").URL()
|
||||
builtUrl := u.String()
|
||||
// Yay, subroute aware of the domain when building!
|
||||
if builtUrl != url {
|
||||
t.Errorf("Expected %q, got %q.", url, builtUrl)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVariableNames(t *testing.T) {
|
||||
route := new(Route).Host("{arg1}.domain.com").Path("/{arg1}/{arg2:[0-9]+}")
|
||||
if route.err == nil {
|
||||
t.Errorf("Expected error for duplicated variable names")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRedirectSlash(t *testing.T) {
|
||||
var route *Route
|
||||
var routeMatch RouteMatch
|
||||
r := NewRouter()
|
||||
|
||||
r.StrictSlash(false)
|
||||
route = r.NewRoute()
|
||||
if route.strictSlash != false {
|
||||
t.Errorf("Expected false redirectSlash.")
|
||||
}
|
||||
|
||||
r.StrictSlash(true)
|
||||
route = r.NewRoute()
|
||||
if route.strictSlash != true {
|
||||
t.Errorf("Expected true redirectSlash.")
|
||||
}
|
||||
|
||||
route = new(Route)
|
||||
route.strictSlash = true
|
||||
route.Path("/{arg1}/{arg2:[0-9]+}/")
|
||||
request, _ := http.NewRequest("GET", "http://localhost/foo/123", nil)
|
||||
routeMatch = RouteMatch{}
|
||||
_ = route.Match(request, &routeMatch)
|
||||
vars := routeMatch.Vars
|
||||
if vars["arg1"] != "foo" {
|
||||
t.Errorf("Expected foo.")
|
||||
}
|
||||
if vars["arg2"] != "123" {
|
||||
t.Errorf("Expected 123.")
|
||||
}
|
||||
rsp := NewRecorder()
|
||||
routeMatch.Handler.ServeHTTP(rsp, request)
|
||||
if rsp.HeaderMap.Get("Location") != "http://localhost/foo/123/" {
|
||||
t.Errorf("Expected redirect header.")
|
||||
}
|
||||
|
||||
route = new(Route)
|
||||
route.strictSlash = true
|
||||
route.Path("/{arg1}/{arg2:[0-9]+}")
|
||||
request, _ = http.NewRequest("GET", "http://localhost/foo/123/", nil)
|
||||
routeMatch = RouteMatch{}
|
||||
_ = route.Match(request, &routeMatch)
|
||||
vars = routeMatch.Vars
|
||||
if vars["arg1"] != "foo" {
|
||||
t.Errorf("Expected foo.")
|
||||
}
|
||||
if vars["arg2"] != "123" {
|
||||
t.Errorf("Expected 123.")
|
||||
}
|
||||
rsp = NewRecorder()
|
||||
routeMatch.Handler.ServeHTTP(rsp, request)
|
||||
if rsp.HeaderMap.Get("Location") != "http://localhost/foo/123" {
|
||||
t.Errorf("Expected redirect header.")
|
||||
}
|
||||
}
|
||||
|
||||
// Test for the new regexp library, still not available in stable Go.
|
||||
func TestNewRegexp(t *testing.T) {
|
||||
var p *routeRegexp
|
||||
var matches []string
|
||||
|
||||
tests := map[string]map[string][]string{
|
||||
"/{foo:a{2}}": {
|
||||
"/a": nil,
|
||||
"/aa": {"aa"},
|
||||
"/aaa": nil,
|
||||
"/aaaa": nil,
|
||||
},
|
||||
"/{foo:a{2,}}": {
|
||||
"/a": nil,
|
||||
"/aa": {"aa"},
|
||||
"/aaa": {"aaa"},
|
||||
"/aaaa": {"aaaa"},
|
||||
},
|
||||
"/{foo:a{2,3}}": {
|
||||
"/a": nil,
|
||||
"/aa": {"aa"},
|
||||
"/aaa": {"aaa"},
|
||||
"/aaaa": nil,
|
||||
},
|
||||
"/{foo:[a-z]{3}}/{bar:[a-z]{2}}": {
|
||||
"/a": nil,
|
||||
"/ab": nil,
|
||||
"/abc": nil,
|
||||
"/abcd": nil,
|
||||
"/abc/ab": {"abc", "ab"},
|
||||
"/abc/abc": nil,
|
||||
"/abcd/ab": nil,
|
||||
},
|
||||
`/{foo:\w{3,}}/{bar:\d{2,}}`: {
|
||||
"/a": nil,
|
||||
"/ab": nil,
|
||||
"/abc": nil,
|
||||
"/abc/1": nil,
|
||||
"/abc/12": {"abc", "12"},
|
||||
"/abcd/12": {"abcd", "12"},
|
||||
"/abcd/123": {"abcd", "123"},
|
||||
},
|
||||
}
|
||||
|
||||
for pattern, paths := range tests {
|
||||
p, _ = newRouteRegexp(pattern, false, false, false, false)
|
||||
for path, result := range paths {
|
||||
matches = p.regexp.FindStringSubmatch(path)
|
||||
if result == nil {
|
||||
if matches != nil {
|
||||
t.Errorf("%v should not match %v.", pattern, path)
|
||||
}
|
||||
} else {
|
||||
if len(matches) != len(result)+1 {
|
||||
t.Errorf("Expected %v matches, got %v.", len(result)+1, len(matches))
|
||||
} else {
|
||||
for k, v := range result {
|
||||
if matches[k+1] != v {
|
||||
t.Errorf("Expected %v, got %v.", v, matches[k+1])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,276 +0,0 @@
|
|||
// Copyright 2012 The Gorilla Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mux
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// newRouteRegexp parses a route template and returns a routeRegexp,
|
||||
// used to match a host, a path or a query string.
|
||||
//
|
||||
// It will extract named variables, assemble a regexp to be matched, create
|
||||
// a "reverse" template to build URLs and compile regexps to validate variable
|
||||
// values used in URL building.
|
||||
//
|
||||
// Previously we accepted only Python-like identifiers for variable
|
||||
// names ([a-zA-Z_][a-zA-Z0-9_]*), but currently the only restriction is that
|
||||
// name and pattern can't be empty, and names can't contain a colon.
|
||||
func newRouteRegexp(tpl string, matchHost, matchPrefix, matchQuery, strictSlash bool) (*routeRegexp, error) {
|
||||
// Check if it is well-formed.
|
||||
idxs, errBraces := braceIndices(tpl)
|
||||
if errBraces != nil {
|
||||
return nil, errBraces
|
||||
}
|
||||
// Backup the original.
|
||||
template := tpl
|
||||
// Now let's parse it.
|
||||
defaultPattern := "[^/]+"
|
||||
if matchQuery {
|
||||
defaultPattern = "[^?&]+"
|
||||
matchPrefix = true
|
||||
} else if matchHost {
|
||||
defaultPattern = "[^.]+"
|
||||
matchPrefix = false
|
||||
}
|
||||
// Only match strict slash if not matching
|
||||
if matchPrefix || matchHost || matchQuery {
|
||||
strictSlash = false
|
||||
}
|
||||
// Set a flag for strictSlash.
|
||||
endSlash := false
|
||||
if strictSlash && strings.HasSuffix(tpl, "/") {
|
||||
tpl = tpl[:len(tpl)-1]
|
||||
endSlash = true
|
||||
}
|
||||
varsN := make([]string, len(idxs)/2)
|
||||
varsR := make([]*regexp.Regexp, len(idxs)/2)
|
||||
pattern := bytes.NewBufferString("")
|
||||
if !matchQuery {
|
||||
pattern.WriteByte('^')
|
||||
}
|
||||
reverse := bytes.NewBufferString("")
|
||||
var end int
|
||||
var err error
|
||||
for i := 0; i < len(idxs); i += 2 {
|
||||
// Set all values we are interested in.
|
||||
raw := tpl[end:idxs[i]]
|
||||
end = idxs[i+1]
|
||||
parts := strings.SplitN(tpl[idxs[i]+1:end-1], ":", 2)
|
||||
name := parts[0]
|
||||
patt := defaultPattern
|
||||
if len(parts) == 2 {
|
||||
patt = parts[1]
|
||||
}
|
||||
// Name or pattern can't be empty.
|
||||
if name == "" || patt == "" {
|
||||
return nil, fmt.Errorf("mux: missing name or pattern in %q",
|
||||
tpl[idxs[i]:end])
|
||||
}
|
||||
// Build the regexp pattern.
|
||||
fmt.Fprintf(pattern, "%s(%s)", regexp.QuoteMeta(raw), patt)
|
||||
// Build the reverse template.
|
||||
fmt.Fprintf(reverse, "%s%%s", raw)
|
||||
// Append variable name and compiled pattern.
|
||||
varsN[i/2] = name
|
||||
varsR[i/2], err = regexp.Compile(fmt.Sprintf("^%s$", patt))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
// Add the remaining.
|
||||
raw := tpl[end:]
|
||||
pattern.WriteString(regexp.QuoteMeta(raw))
|
||||
if strictSlash {
|
||||
pattern.WriteString("[/]?")
|
||||
}
|
||||
if !matchPrefix {
|
||||
pattern.WriteByte('$')
|
||||
}
|
||||
reverse.WriteString(raw)
|
||||
if endSlash {
|
||||
reverse.WriteByte('/')
|
||||
}
|
||||
// Compile full regexp.
|
||||
reg, errCompile := regexp.Compile(pattern.String())
|
||||
if errCompile != nil {
|
||||
return nil, errCompile
|
||||
}
|
||||
// Done!
|
||||
return &routeRegexp{
|
||||
template: template,
|
||||
matchHost: matchHost,
|
||||
matchQuery: matchQuery,
|
||||
strictSlash: strictSlash,
|
||||
regexp: reg,
|
||||
reverse: reverse.String(),
|
||||
varsN: varsN,
|
||||
varsR: varsR,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// routeRegexp stores a regexp to match a host or path and information to
|
||||
// collect and validate route variables.
|
||||
type routeRegexp struct {
|
||||
// The unmodified template.
|
||||
template string
|
||||
// True for host match, false for path or query string match.
|
||||
matchHost bool
|
||||
// True for query string match, false for path and host match.
|
||||
matchQuery bool
|
||||
// The strictSlash value defined on the route, but disabled if PathPrefix was used.
|
||||
strictSlash bool
|
||||
// Expanded regexp.
|
||||
regexp *regexp.Regexp
|
||||
// Reverse template.
|
||||
reverse string
|
||||
// Variable names.
|
||||
varsN []string
|
||||
// Variable regexps (validators).
|
||||
varsR []*regexp.Regexp
|
||||
}
|
||||
|
||||
// Match matches the regexp against the URL host or path.
|
||||
func (r *routeRegexp) Match(req *http.Request, match *RouteMatch) bool {
|
||||
if !r.matchHost {
|
||||
if r.matchQuery {
|
||||
return r.regexp.MatchString(req.URL.RawQuery)
|
||||
} else {
|
||||
return r.regexp.MatchString(req.URL.Path)
|
||||
}
|
||||
}
|
||||
return r.regexp.MatchString(getHost(req))
|
||||
}
|
||||
|
||||
// url builds a URL part using the given values.
|
||||
func (r *routeRegexp) url(pairs ...string) (string, error) {
|
||||
values, err := mapFromPairs(pairs...)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
urlValues := make([]interface{}, len(r.varsN))
|
||||
for k, v := range r.varsN {
|
||||
value, ok := values[v]
|
||||
if !ok {
|
||||
return "", fmt.Errorf("mux: missing route variable %q", v)
|
||||
}
|
||||
urlValues[k] = value
|
||||
}
|
||||
rv := fmt.Sprintf(r.reverse, urlValues...)
|
||||
if !r.regexp.MatchString(rv) {
|
||||
// The URL is checked against the full regexp, instead of checking
|
||||
// individual variables. This is faster but to provide a good error
|
||||
// message, we check individual regexps if the URL doesn't match.
|
||||
for k, v := range r.varsN {
|
||||
if !r.varsR[k].MatchString(values[v]) {
|
||||
return "", fmt.Errorf(
|
||||
"mux: variable %q doesn't match, expected %q", values[v],
|
||||
r.varsR[k].String())
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// braceIndices returns the first level curly brace indices from a string.
|
||||
// It returns an error in case of unbalanced braces.
|
||||
func braceIndices(s string) ([]int, error) {
|
||||
var level, idx int
|
||||
idxs := make([]int, 0)
|
||||
for i := 0; i < len(s); i++ {
|
||||
switch s[i] {
|
||||
case '{':
|
||||
if level++; level == 1 {
|
||||
idx = i
|
||||
}
|
||||
case '}':
|
||||
if level--; level == 0 {
|
||||
idxs = append(idxs, idx, i+1)
|
||||
} else if level < 0 {
|
||||
return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
|
||||
}
|
||||
}
|
||||
}
|
||||
if level != 0 {
|
||||
return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
|
||||
}
|
||||
return idxs, nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// routeRegexpGroup
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// routeRegexpGroup groups the route matchers that carry variables.
|
||||
type routeRegexpGroup struct {
|
||||
host *routeRegexp
|
||||
path *routeRegexp
|
||||
queries []*routeRegexp
|
||||
}
|
||||
|
||||
// setMatch extracts the variables from the URL once a route matches.
|
||||
func (v *routeRegexpGroup) setMatch(req *http.Request, m *RouteMatch, r *Route) {
|
||||
// Store host variables.
|
||||
if v.host != nil {
|
||||
hostVars := v.host.regexp.FindStringSubmatch(getHost(req))
|
||||
if hostVars != nil {
|
||||
for k, v := range v.host.varsN {
|
||||
m.Vars[v] = hostVars[k+1]
|
||||
}
|
||||
}
|
||||
}
|
||||
// Store path variables.
|
||||
if v.path != nil {
|
||||
pathVars := v.path.regexp.FindStringSubmatch(req.URL.Path)
|
||||
if pathVars != nil {
|
||||
for k, v := range v.path.varsN {
|
||||
m.Vars[v] = pathVars[k+1]
|
||||
}
|
||||
// Check if we should redirect.
|
||||
if v.path.strictSlash {
|
||||
p1 := strings.HasSuffix(req.URL.Path, "/")
|
||||
p2 := strings.HasSuffix(v.path.template, "/")
|
||||
if p1 != p2 {
|
||||
u, _ := url.Parse(req.URL.String())
|
||||
if p1 {
|
||||
u.Path = u.Path[:len(u.Path)-1]
|
||||
} else {
|
||||
u.Path += "/"
|
||||
}
|
||||
m.Handler = http.RedirectHandler(u.String(), 301)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Store query string variables.
|
||||
rawQuery := req.URL.RawQuery
|
||||
for _, q := range v.queries {
|
||||
queryVars := q.regexp.FindStringSubmatch(rawQuery)
|
||||
if queryVars != nil {
|
||||
for k, v := range q.varsN {
|
||||
m.Vars[v] = queryVars[k+1]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// getHost tries its best to return the request host.
|
||||
func getHost(r *http.Request) string {
|
||||
if r.URL.IsAbs() {
|
||||
return r.URL.Host
|
||||
}
|
||||
host := r.Host
|
||||
// Slice off any port information.
|
||||
if i := strings.Index(host, ":"); i != -1 {
|
||||
host = host[:i]
|
||||
}
|
||||
return host
|
||||
|
||||
}
|
|
@ -1,524 +0,0 @@
|
|||
// Copyright 2012 The Gorilla Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package mux
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Route stores information to match a request and build URLs.
|
||||
type Route struct {
|
||||
// Parent where the route was registered (a Router).
|
||||
parent parentRoute
|
||||
// Request handler for the route.
|
||||
handler http.Handler
|
||||
// List of matchers.
|
||||
matchers []matcher
|
||||
// Manager for the variables from host and path.
|
||||
regexp *routeRegexpGroup
|
||||
// If true, when the path pattern is "/path/", accessing "/path" will
|
||||
// redirect to the former and vice versa.
|
||||
strictSlash bool
|
||||
// If true, this route never matches: it is only used to build URLs.
|
||||
buildOnly bool
|
||||
// The name used to build URLs.
|
||||
name string
|
||||
// Error resulted from building a route.
|
||||
err error
|
||||
}
|
||||
|
||||
// Match matches the route against the request.
|
||||
func (r *Route) Match(req *http.Request, match *RouteMatch) bool {
|
||||
if r.buildOnly || r.err != nil {
|
||||
return false
|
||||
}
|
||||
// Match everything.
|
||||
for _, m := range r.matchers {
|
||||
if matched := m.Match(req, match); !matched {
|
||||
return false
|
||||
}
|
||||
}
|
||||
// Yay, we have a match. Let's collect some info about it.
|
||||
if match.Route == nil {
|
||||
match.Route = r
|
||||
}
|
||||
if match.Handler == nil {
|
||||
match.Handler = r.handler
|
||||
}
|
||||
if match.Vars == nil {
|
||||
match.Vars = make(map[string]string)
|
||||
}
|
||||
// Set variables.
|
||||
if r.regexp != nil {
|
||||
r.regexp.setMatch(req, match, r)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Route attributes
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// GetError returns an error resulted from building the route, if any.
|
||||
func (r *Route) GetError() error {
|
||||
return r.err
|
||||
}
|
||||
|
||||
// BuildOnly sets the route to never match: it is only used to build URLs.
|
||||
func (r *Route) BuildOnly() *Route {
|
||||
r.buildOnly = true
|
||||
return r
|
||||
}
|
||||
|
||||
// Handler --------------------------------------------------------------------
|
||||
|
||||
// Handler sets a handler for the route.
|
||||
func (r *Route) Handler(handler http.Handler) *Route {
|
||||
if r.err == nil {
|
||||
r.handler = handler
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// HandlerFunc sets a handler function for the route.
|
||||
func (r *Route) HandlerFunc(f func(http.ResponseWriter, *http.Request)) *Route {
|
||||
return r.Handler(http.HandlerFunc(f))
|
||||
}
|
||||
|
||||
// GetHandler returns the handler for the route, if any.
|
||||
func (r *Route) GetHandler() http.Handler {
|
||||
return r.handler
|
||||
}
|
||||
|
||||
// Name -----------------------------------------------------------------------
|
||||
|
||||
// Name sets the name for the route, used to build URLs.
|
||||
// If the name was registered already it will be overwritten.
|
||||
func (r *Route) Name(name string) *Route {
|
||||
if r.name != "" {
|
||||
r.err = fmt.Errorf("mux: route already has name %q, can't set %q",
|
||||
r.name, name)
|
||||
}
|
||||
if r.err == nil {
|
||||
r.name = name
|
||||
r.getNamedRoutes()[name] = r
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// GetName returns the name for the route, if any.
|
||||
func (r *Route) GetName() string {
|
||||
return r.name
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Matchers
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// matcher types try to match a request.
|
||||
type matcher interface {
|
||||
Match(*http.Request, *RouteMatch) bool
|
||||
}
|
||||
|
||||
// addMatcher adds a matcher to the route.
|
||||
func (r *Route) addMatcher(m matcher) *Route {
|
||||
if r.err == nil {
|
||||
r.matchers = append(r.matchers, m)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// addRegexpMatcher adds a host or path matcher and builder to a route.
|
||||
func (r *Route) addRegexpMatcher(tpl string, matchHost, matchPrefix, matchQuery bool) error {
|
||||
if r.err != nil {
|
||||
return r.err
|
||||
}
|
||||
r.regexp = r.getRegexpGroup()
|
||||
if !matchHost && !matchQuery {
|
||||
if len(tpl) == 0 || tpl[0] != '/' {
|
||||
return fmt.Errorf("mux: path must start with a slash, got %q", tpl)
|
||||
}
|
||||
if r.regexp.path != nil {
|
||||
tpl = strings.TrimRight(r.regexp.path.template, "/") + tpl
|
||||
}
|
||||
}
|
||||
rr, err := newRouteRegexp(tpl, matchHost, matchPrefix, matchQuery, r.strictSlash)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, q := range r.regexp.queries {
|
||||
if err = uniqueVars(rr.varsN, q.varsN); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if matchHost {
|
||||
if r.regexp.path != nil {
|
||||
if err = uniqueVars(rr.varsN, r.regexp.path.varsN); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
r.regexp.host = rr
|
||||
} else {
|
||||
if r.regexp.host != nil {
|
||||
if err = uniqueVars(rr.varsN, r.regexp.host.varsN); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if matchQuery {
|
||||
r.regexp.queries = append(r.regexp.queries, rr)
|
||||
} else {
|
||||
r.regexp.path = rr
|
||||
}
|
||||
}
|
||||
r.addMatcher(rr)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Headers --------------------------------------------------------------------
|
||||
|
||||
// headerMatcher matches the request against header values.
|
||||
type headerMatcher map[string]string
|
||||
|
||||
func (m headerMatcher) Match(r *http.Request, match *RouteMatch) bool {
|
||||
return matchMap(m, r.Header, true)
|
||||
}
|
||||
|
||||
// Headers adds a matcher for request header values.
|
||||
// It accepts a sequence of key/value pairs to be matched. For example:
|
||||
//
|
||||
// r := mux.NewRouter()
|
||||
// r.Headers("Content-Type", "application/json",
|
||||
// "X-Requested-With", "XMLHttpRequest")
|
||||
//
|
||||
// The above route will only match if both request header values match.
|
||||
//
|
||||
// It the value is an empty string, it will match any value if the key is set.
|
||||
func (r *Route) Headers(pairs ...string) *Route {
|
||||
if r.err == nil {
|
||||
var headers map[string]string
|
||||
headers, r.err = mapFromPairs(pairs...)
|
||||
return r.addMatcher(headerMatcher(headers))
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Host -----------------------------------------------------------------------
|
||||
|
||||
// Host adds a matcher for the URL host.
|
||||
// It accepts a template with zero or more URL variables enclosed by {}.
|
||||
// Variables can define an optional regexp pattern to me matched:
|
||||
//
|
||||
// - {name} matches anything until the next dot.
|
||||
//
|
||||
// - {name:pattern} matches the given regexp pattern.
|
||||
//
|
||||
// For example:
|
||||
//
|
||||
// r := mux.NewRouter()
|
||||
// r.Host("www.domain.com")
|
||||
// r.Host("{subdomain}.domain.com")
|
||||
// r.Host("{subdomain:[a-z]+}.domain.com")
|
||||
//
|
||||
// Variable names must be unique in a given route. They can be retrieved
|
||||
// calling mux.Vars(request).
|
||||
func (r *Route) Host(tpl string) *Route {
|
||||
r.err = r.addRegexpMatcher(tpl, true, false, false)
|
||||
return r
|
||||
}
|
||||
|
||||
// MatcherFunc ----------------------------------------------------------------
|
||||
|
||||
// MatcherFunc is the function signature used by custom matchers.
|
||||
type MatcherFunc func(*http.Request, *RouteMatch) bool
|
||||
|
||||
func (m MatcherFunc) Match(r *http.Request, match *RouteMatch) bool {
|
||||
return m(r, match)
|
||||
}
|
||||
|
||||
// MatcherFunc adds a custom function to be used as request matcher.
|
||||
func (r *Route) MatcherFunc(f MatcherFunc) *Route {
|
||||
return r.addMatcher(f)
|
||||
}
|
||||
|
||||
// Methods --------------------------------------------------------------------
|
||||
|
||||
// methodMatcher matches the request against HTTP methods.
|
||||
type methodMatcher []string
|
||||
|
||||
func (m methodMatcher) Match(r *http.Request, match *RouteMatch) bool {
|
||||
return matchInArray(m, r.Method)
|
||||
}
|
||||
|
||||
// Methods adds a matcher for HTTP methods.
|
||||
// It accepts a sequence of one or more methods to be matched, e.g.:
|
||||
// "GET", "POST", "PUT".
|
||||
func (r *Route) Methods(methods ...string) *Route {
|
||||
for k, v := range methods {
|
||||
methods[k] = strings.ToUpper(v)
|
||||
}
|
||||
return r.addMatcher(methodMatcher(methods))
|
||||
}
|
||||
|
||||
// Path -----------------------------------------------------------------------
|
||||
|
||||
// Path adds a matcher for the URL path.
|
||||
// It accepts a template with zero or more URL variables enclosed by {}. The
|
||||
// template must start with a "/".
|
||||
// Variables can define an optional regexp pattern to me matched:
|
||||
//
|
||||
// - {name} matches anything until the next slash.
|
||||
//
|
||||
// - {name:pattern} matches the given regexp pattern.
|
||||
//
|
||||
// For example:
|
||||
//
|
||||
// r := mux.NewRouter()
|
||||
// r.Path("/products/").Handler(ProductsHandler)
|
||||
// r.Path("/products/{key}").Handler(ProductsHandler)
|
||||
// r.Path("/articles/{category}/{id:[0-9]+}").
|
||||
// Handler(ArticleHandler)
|
||||
//
|
||||
// Variable names must be unique in a given route. They can be retrieved
|
||||
// calling mux.Vars(request).
|
||||
func (r *Route) Path(tpl string) *Route {
|
||||
r.err = r.addRegexpMatcher(tpl, false, false, false)
|
||||
return r
|
||||
}
|
||||
|
||||
// PathPrefix -----------------------------------------------------------------
|
||||
|
||||
// PathPrefix adds a matcher for the URL path prefix. This matches if the given
|
||||
// template is a prefix of the full URL path. See Route.Path() for details on
|
||||
// the tpl argument.
|
||||
//
|
||||
// Note that it does not treat slashes specially ("/foobar/" will be matched by
|
||||
// the prefix "/foo") so you may want to use a trailing slash here.
|
||||
//
|
||||
// Also note that the setting of Router.StrictSlash() has no effect on routes
|
||||
// with a PathPrefix matcher.
|
||||
func (r *Route) PathPrefix(tpl string) *Route {
|
||||
r.err = r.addRegexpMatcher(tpl, false, true, false)
|
||||
return r
|
||||
}
|
||||
|
||||
// Query ----------------------------------------------------------------------
|
||||
|
||||
// Queries adds a matcher for URL query values.
|
||||
// It accepts a sequence of key/value pairs. Values may define variables.
|
||||
// For example:
|
||||
//
|
||||
// r := mux.NewRouter()
|
||||
// r.Queries("foo", "bar", "id", "{id:[0-9]+}")
|
||||
//
|
||||
// The above route will only match if the URL contains the defined queries
|
||||
// values, e.g.: ?foo=bar&id=42.
|
||||
//
|
||||
// It the value is an empty string, it will match any value if the key is set.
|
||||
//
|
||||
// Variables can define an optional regexp pattern to me matched:
|
||||
//
|
||||
// - {name} matches anything until the next slash.
|
||||
//
|
||||
// - {name:pattern} matches the given regexp pattern.
|
||||
func (r *Route) Queries(pairs ...string) *Route {
|
||||
length := len(pairs)
|
||||
if length%2 != 0 {
|
||||
r.err = fmt.Errorf(
|
||||
"mux: number of parameters must be multiple of 2, got %v", pairs)
|
||||
return nil
|
||||
}
|
||||
for i := 0; i < length; i += 2 {
|
||||
if r.err = r.addRegexpMatcher(pairs[i]+"="+pairs[i+1], false, true, true); r.err != nil {
|
||||
return r
|
||||
}
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// Schemes --------------------------------------------------------------------
|
||||
|
||||
// schemeMatcher matches the request against URL schemes.
|
||||
type schemeMatcher []string
|
||||
|
||||
func (m schemeMatcher) Match(r *http.Request, match *RouteMatch) bool {
|
||||
return matchInArray(m, r.URL.Scheme)
|
||||
}
|
||||
|
||||
// Schemes adds a matcher for URL schemes.
|
||||
// It accepts a sequence of schemes to be matched, e.g.: "http", "https".
|
||||
func (r *Route) Schemes(schemes ...string) *Route {
|
||||
for k, v := range schemes {
|
||||
schemes[k] = strings.ToLower(v)
|
||||
}
|
||||
return r.addMatcher(schemeMatcher(schemes))
|
||||
}
|
||||
|
||||
// Subrouter ------------------------------------------------------------------
|
||||
|
||||
// Subrouter creates a subrouter for the route.
|
||||
//
|
||||
// It will test the inner routes only if the parent route matched. For example:
|
||||
//
|
||||
// r := mux.NewRouter()
|
||||
// s := r.Host("www.domain.com").Subrouter()
|
||||
// s.HandleFunc("/products/", ProductsHandler)
|
||||
// s.HandleFunc("/products/{key}", ProductHandler)
|
||||
// s.HandleFunc("/articles/{category}/{id:[0-9]+}"), ArticleHandler)
|
||||
//
|
||||
// Here, the routes registered in the subrouter won't be tested if the host
|
||||
// doesn't match.
|
||||
func (r *Route) Subrouter() *Router {
|
||||
router := &Router{parent: r, strictSlash: r.strictSlash}
|
||||
r.addMatcher(router)
|
||||
return router
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// URL building
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// URL builds a URL for the route.
|
||||
//
|
||||
// It accepts a sequence of key/value pairs for the route variables. For
|
||||
// example, given this route:
|
||||
//
|
||||
// r := mux.NewRouter()
|
||||
// r.HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler).
|
||||
// Name("article")
|
||||
//
|
||||
// ...a URL for it can be built using:
|
||||
//
|
||||
// url, err := r.Get("article").URL("category", "technology", "id", "42")
|
||||
//
|
||||
// ...which will return an url.URL with the following path:
|
||||
//
|
||||
// "/articles/technology/42"
|
||||
//
|
||||
// This also works for host variables:
|
||||
//
|
||||
// r := mux.NewRouter()
|
||||
// r.Host("{subdomain}.domain.com").
|
||||
// HandleFunc("/articles/{category}/{id:[0-9]+}", ArticleHandler).
|
||||
// Name("article")
|
||||
//
|
||||
// // url.String() will be "http://news.domain.com/articles/technology/42"
|
||||
// url, err := r.Get("article").URL("subdomain", "news",
|
||||
// "category", "technology",
|
||||
// "id", "42")
|
||||
//
|
||||
// All variables defined in the route are required, and their values must
|
||||
// conform to the corresponding patterns.
|
||||
func (r *Route) URL(pairs ...string) (*url.URL, error) {
|
||||
if r.err != nil {
|
||||
return nil, r.err
|
||||
}
|
||||
if r.regexp == nil {
|
||||
return nil, errors.New("mux: route doesn't have a host or path")
|
||||
}
|
||||
var scheme, host, path string
|
||||
var err error
|
||||
if r.regexp.host != nil {
|
||||
// Set a default scheme.
|
||||
scheme = "http"
|
||||
if host, err = r.regexp.host.url(pairs...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if r.regexp.path != nil {
|
||||
if path, err = r.regexp.path.url(pairs...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return &url.URL{
|
||||
Scheme: scheme,
|
||||
Host: host,
|
||||
Path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// URLHost builds the host part of the URL for a route. See Route.URL().
|
||||
//
|
||||
// The route must have a host defined.
|
||||
func (r *Route) URLHost(pairs ...string) (*url.URL, error) {
|
||||
if r.err != nil {
|
||||
return nil, r.err
|
||||
}
|
||||
if r.regexp == nil || r.regexp.host == nil {
|
||||
return nil, errors.New("mux: route doesn't have a host")
|
||||
}
|
||||
host, err := r.regexp.host.url(pairs...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &url.URL{
|
||||
Scheme: "http",
|
||||
Host: host,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// URLPath builds the path part of the URL for a route. See Route.URL().
|
||||
//
|
||||
// The route must have a path defined.
|
||||
func (r *Route) URLPath(pairs ...string) (*url.URL, error) {
|
||||
if r.err != nil {
|
||||
return nil, r.err
|
||||
}
|
||||
if r.regexp == nil || r.regexp.path == nil {
|
||||
return nil, errors.New("mux: route doesn't have a path")
|
||||
}
|
||||
path, err := r.regexp.path.url(pairs...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &url.URL{
|
||||
Path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// parentRoute
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// parentRoute allows routes to know about parent host and path definitions.
|
||||
type parentRoute interface {
|
||||
getNamedRoutes() map[string]*Route
|
||||
getRegexpGroup() *routeRegexpGroup
|
||||
}
|
||||
|
||||
// getNamedRoutes returns the map where named routes are registered.
|
||||
func (r *Route) getNamedRoutes() map[string]*Route {
|
||||
if r.parent == nil {
|
||||
// During tests router is not always set.
|
||||
r.parent = NewRouter()
|
||||
}
|
||||
return r.parent.getNamedRoutes()
|
||||
}
|
||||
|
||||
// getRegexpGroup returns regexp definitions from this route.
|
||||
func (r *Route) getRegexpGroup() *routeRegexpGroup {
|
||||
if r.regexp == nil {
|
||||
if r.parent == nil {
|
||||
// During tests router is not always set.
|
||||
r.parent = NewRouter()
|
||||
}
|
||||
regexp := r.parent.getRegexpGroup()
|
||||
if regexp == nil {
|
||||
r.regexp = new(routeRegexpGroup)
|
||||
} else {
|
||||
// Copy.
|
||||
r.regexp = &routeRegexpGroup{
|
||||
host: regexp.host,
|
||||
path: regexp.path,
|
||||
queries: regexp.queries,
|
||||
}
|
||||
}
|
||||
}
|
||||
return r.regexp
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
*.o
|
||||
*.a
|
||||
*.so
|
||||
*~
|
||||
*.dSYM
|
||||
*.syso
|
|
@ -1,80 +0,0 @@
|
|||
## Ubuntu (Kylin) 14.04
|
||||
### Build Dependencies
|
||||
This installation document assumes Ubuntu 14.04 or later on x86-64 platform.
|
||||
|
||||
##### Install YASM
|
||||
|
||||
Erasure depends on Intel ISAL library, ISAL uses Intel AVX2 processor instructions, to compile these files one needs to install ``yasm`` which supports AVX2 instructions. AVX2 support only ended in ``yasm`` from version ``1.2.0``, any version below ``1.2.0`` will throw a build error.
|
||||
|
||||
```sh
|
||||
$ sudo apt-get install yasm
|
||||
```
|
||||
|
||||
##### Install Go 1.4+
|
||||
Download Go 1.4+ from [https://golang.org/dl/](https://golang.org/dl/) and extract it into ``${HOME}/local`` and setup ``${HOME}/mygo`` as your project workspace folder.
|
||||
For example:
|
||||
```sh
|
||||
.... Extract and install golang ....
|
||||
|
||||
$ wget https://storage.googleapis.com/golang/go1.4.linux-amd64.tar.gz
|
||||
$ mkdir -p ${HOME}/local
|
||||
$ mkdir -p $HOME/mygo
|
||||
$ tar -C ${HOME}/local -xzf go1.4.linux-amd64.tar.gz
|
||||
|
||||
.... Export necessary environment variables ....
|
||||
|
||||
$ export PATH=$PATH:${HOME}/local/go/bin
|
||||
$ export GOROOT=${HOME}/local/go
|
||||
$ export GOPATH=$HOME/mygo
|
||||
$ export PATH=$PATH:$GOPATH/bin
|
||||
|
||||
.... Add paths to your bashrc ....
|
||||
|
||||
$ echo "export PATH=$PATH:${HOME}/local/go/bin" >> ${HOME}/.bashrc
|
||||
$ echo "export GOROOT=${HOME}/local/go" >> ${HOME}/.bashrc
|
||||
$ echo "export GOPATH=$HOME/mygo" >> ${HOME}/.bashrc
|
||||
$ echo "export PATH=$PATH:$GOPATH/bin" >> ${HOME}/.bashrc
|
||||
```
|
||||
|
||||
## Mac OSX (Yosemite) 10.10
|
||||
### Build Dependencies
|
||||
This installation document assumes Mac OSX Yosemite 10.10 or later on x86-64 platform.
|
||||
|
||||
##### Install brew
|
||||
```sh
|
||||
$ ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
|
||||
```
|
||||
|
||||
##### Install Git
|
||||
```sh
|
||||
$ brew install git
|
||||
```
|
||||
|
||||
##### Install YASM
|
||||
|
||||
Erasure depends on Intel ISAL library, ISAL uses Intel AVX2 processor instructions, to compile these files one needs to install ``yasm`` which supports AVX2 instructions. AVX2 support only ended in ``yasm`` from version ``1.2.0``, any version below ``1.2.0`` will throw a build error.
|
||||
|
||||
```sh
|
||||
$ brew install yasm
|
||||
```
|
||||
|
||||
##### Install Go 1.4+
|
||||
On MacOSX ``brew.sh`` is the best way to install golang
|
||||
|
||||
For example:
|
||||
```sh
|
||||
.... Install golang using `brew` ....
|
||||
|
||||
$ brew install go
|
||||
$ mkdir -p $HOME/mygo
|
||||
|
||||
.... Export necessary environment variables ....
|
||||
|
||||
$ export GOPATH=$HOME/mygo
|
||||
$ export PATH=$PATH:$GOPATH/bin
|
||||
|
||||
.... Add paths to your bashrc ....
|
||||
|
||||
$ echo "export GOPATH=$HOME/mygo" >> ${HOME}/.bashrc
|
||||
$ echo "export PATH=$PATH:$GOPATH/bin" >> ${HOME}/.bashrc
|
||||
```
|
|
@ -1,30 +0,0 @@
|
|||
### Setup your Erasure Github Repository
|
||||
Fork [Erasure upstream](https://github.com/minio-io/erasure/fork) source repository to your own personal repository. Copy the URL and pass it to ``go get`` command. Go uses git to clone a copy into your project workspace folder.
|
||||
```sh
|
||||
$ git clone https://github.com/$USER_ID/erasure
|
||||
$ cd erasure
|
||||
$ mkdir -p ${GOPATH}/src/github.com/minio-io
|
||||
$ ln -s ${PWD} $GOPATH/src/github.com/minio-io/
|
||||
```
|
||||
|
||||
### Compiling Erasure from source
|
||||
```sh
|
||||
$ go generate
|
||||
$ go build
|
||||
```
|
||||
### Developer Guidelines
|
||||
To make the process as seamless as possible, we ask for the following:
|
||||
* Go ahead and fork the project and make your changes. We encourage pull requests to discuss code changes.
|
||||
- Fork it
|
||||
- Create your feature branch (git checkout -b my-new-feature)
|
||||
- Commit your changes (git commit -am 'Add some feature')
|
||||
- Push to the branch (git push origin my-new-feature)
|
||||
- Create new Pull Request
|
||||
* When you're ready to create a pull request, be sure to:
|
||||
- Have test cases for the new code. If you have questions about how to do it, please ask in your pull request.
|
||||
- Run `go fmt`
|
||||
- Squash your commits into a single commit. `git rebase -i`. It's okay to force update your pull request.
|
||||
- Make sure `go test -race ./...` and `go build` completes.
|
||||
* Read [Effective Go](https://github.com/golang/go/wiki/CodeReviewComments) article from Golang project
|
||||
- `Erasure` project is strictly conformant with Golang style
|
||||
- if you happen to observe offending code, please feel free to send a pull request
|
|
@ -1,26 +0,0 @@
|
|||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -1,202 +0,0 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -1,25 +0,0 @@
|
|||
## Introduction
|
||||
|
||||
Erasure is an open source Golang library written on top of ISAL (Intel Intelligent Storage Library) released under [Apache license v2](./LICENSE)
|
||||
|
||||
### Developers
|
||||
* [Get Source](./CONTRIBUTING.md)
|
||||
* [Build Dependencies](./BUILDDEPS.md)
|
||||
* [Development Workflow](./CONTRIBUTING.md#developer-guidelines)
|
||||
* [Developer discussions and bugs](https://github.com/Minio-io/erasure/issues)
|
||||
|
||||
### Supported platforms
|
||||
|
||||
| Name | Supported |
|
||||
| ------------- | ------------- |
|
||||
| Linux | Yes |
|
||||
| Windows | Not yet |
|
||||
| Mac OSX | Yes |
|
||||
|
||||
### Supported architectures
|
||||
|
||||
| Arch | Supported |
|
||||
| ------------- | ------------- |
|
||||
| x86-64 | Yes |
|
||||
| arm64 | Not yet|
|
||||
| i386 | Never |
|
|
@ -1,49 +0,0 @@
|
|||
================================================================================
|
||||
v2.10 Intel Intelligent Storage Acceleration Library Release Notes
|
||||
Open Source Version
|
||||
================================================================================
|
||||
|
||||
================================================================================
|
||||
RELEASE NOTE CONTENTS
|
||||
================================================================================
|
||||
1. KNOWN ISSUES
|
||||
2. FIXED ISSUES
|
||||
3. CHANGE LOG & FEATURES ADDED
|
||||
|
||||
================================================================================
|
||||
1. KNOWN ISSUES
|
||||
================================================================================
|
||||
|
||||
* Only erasure code unit included in open source version at this time.
|
||||
|
||||
* Perf tests do not run in Windows environment.
|
||||
|
||||
* Leaving <unit>/bin directories from builds in unit directories will cause the
|
||||
top-level make build to fail. Build only in top-level or ensure unit
|
||||
directories are clean of objects and /bin.
|
||||
|
||||
* 32-bit lib is not supported in Windows.
|
||||
|
||||
================================================================================
|
||||
2. FIXED ISSUES
|
||||
================================================================================
|
||||
v2.10
|
||||
|
||||
* Fix for windows register save overlap in gf_{3-6}vect_dot_prod_sse.asm. Only
|
||||
affects windows versions of erasure code. GP register saves/restore were
|
||||
pushed to same stack area as XMM.
|
||||
|
||||
================================================================================
|
||||
3. CHANGE LOG & FEATURES ADDED
|
||||
================================================================================
|
||||
v2.10
|
||||
|
||||
* Erasure code updates
|
||||
- New AVX and AVX2 support functions.
|
||||
- Changes min len requirement on gf_vect_dot_prod() to 32 from 16.
|
||||
- Tests include both source and parity recovery with ec_encode_data().
|
||||
- New encoding examples with Vandermonde or Cauchy matrix.
|
||||
|
||||
v2.8
|
||||
|
||||
* First open release of erasure code unit that is part of ISA-L.
|
|
@ -1,3 +0,0 @@
|
|||
v1.0 - Erasure Golang Package
|
||||
============================
|
||||
- First release, supports only amd64 or x86-64 architecture
|
|
@ -1,71 +0,0 @@
|
|||
/*
|
||||
* Minimalist Object Storage, (C) 2014 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package erasure
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
. "gopkg.in/check.v1"
|
||||
)
|
||||
|
||||
type MySuite struct{}
|
||||
|
||||
var _ = Suite(&MySuite{})
|
||||
|
||||
func Test(t *testing.T) { TestingT(t) }
|
||||
|
||||
const (
|
||||
k = 10
|
||||
m = 5
|
||||
)
|
||||
|
||||
func (s *MySuite) TestCauchyEncodeDecodeFailure(c *C) {
|
||||
ep, _ := ValidateParams(k, m, Cauchy)
|
||||
|
||||
data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
|
||||
|
||||
e := NewErasure(ep)
|
||||
chunks, err := e.Encode(data)
|
||||
c.Assert(err, IsNil)
|
||||
|
||||
errorIndex := []int{0, 3, 5, 9, 11, 13}
|
||||
chunks = corruptChunks(chunks, errorIndex)
|
||||
|
||||
_, err = e.Decode(chunks, len(data))
|
||||
c.Assert(err, Not(IsNil))
|
||||
}
|
||||
|
||||
func (s *MySuite) TestCauchyEncodeDecodeSuccess(c *C) {
|
||||
ep, _ := ValidateParams(k, m, Cauchy)
|
||||
|
||||
data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
|
||||
|
||||
e := NewErasure(ep)
|
||||
chunks, err := e.Encode(data)
|
||||
c.Assert(err, IsNil)
|
||||
|
||||
errorIndex := []int{0, 3, 5, 9, 13}
|
||||
chunks = corruptChunks(chunks, errorIndex)
|
||||
|
||||
recoveredData, err := e.Decode(chunks, len(data))
|
||||
c.Assert(err, IsNil)
|
||||
|
||||
if !bytes.Equal(data, recoveredData) {
|
||||
c.Fatalf("Recovered data mismatches with original data")
|
||||
}
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Minimalist Object Storage, (C) 2014 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package erasure
|
||||
|
||||
// #include <stdint.h>
|
||||
import "C"
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// intSlice2CIntArray converts Go int slice to C int array
|
||||
func intSlice2CIntArray(srcErrList []int) *C.int32_t {
|
||||
var sizeErrInt = int(unsafe.Sizeof(srcErrList[0]))
|
||||
switch sizeInt {
|
||||
case sizeErrInt:
|
||||
return (*C.int32_t)(unsafe.Pointer(&srcErrList[0]))
|
||||
case sizeInt8:
|
||||
int8Array := make([]int8, len(srcErrList))
|
||||
for i, v := range srcErrList {
|
||||
int8Array[i] = int8(v)
|
||||
}
|
||||
return (*C.int32_t)(unsafe.Pointer(&int8Array[0]))
|
||||
case sizeInt16:
|
||||
int16Array := make([]int16, len(srcErrList))
|
||||
for i, v := range srcErrList {
|
||||
int16Array[i] = int16(v)
|
||||
}
|
||||
return (*C.int32_t)(unsafe.Pointer(&int16Array[0]))
|
||||
case sizeInt32:
|
||||
int32Array := make([]int32, len(srcErrList))
|
||||
for i, v := range srcErrList {
|
||||
int32Array[i] = int32(v)
|
||||
}
|
||||
return (*C.int32_t)(unsafe.Pointer(&int32Array[0]))
|
||||
case sizeInt64:
|
||||
int64Array := make([]int64, len(srcErrList))
|
||||
for i, v := range srcErrList {
|
||||
int64Array[i] = int64(v)
|
||||
}
|
||||
return (*C.int32_t)(unsafe.Pointer(&int64Array[0]))
|
||||
default:
|
||||
panic(fmt.Sprintf("Unsupported: %d", sizeInt))
|
||||
}
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
// Package erasure is a Go wrapper for the Intel Intelligent Storage
|
||||
// Acceleration Library (Intel ISA-L). Intel ISA-L is a CPU optimized
|
||||
// implementation of erasure coding algorithms.
|
||||
//
|
||||
// For more information on Intel ISA-L, please visit:
|
||||
// https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// Encode encodes a block of data. The input is the original data. The output
|
||||
// is a 2 tuple containing (k + m) chunks of erasure encoded data and the
|
||||
// length of the original object.
|
||||
//
|
||||
// Decode decodes 2 tuple data containing (k + m) chunks back into its original form.
|
||||
// Additionally original block length should also be provided as input.
|
||||
//
|
||||
// Decoded data is exactly similar in length and content as the original data.
|
||||
//
|
||||
// Encoding data may be performed in 3 steps.
|
||||
//
|
||||
// 1. Create a parse set of encoder parameters
|
||||
// 2. Create a new encoder
|
||||
// 3. Encode data
|
||||
//
|
||||
// Decoding data is also performed in 3 steps.
|
||||
//
|
||||
// 1. Create a parse set of encoder parameters for validation
|
||||
// 2. Create a new encoder
|
||||
// 3. Decode data
|
||||
//
|
||||
// Erasure parameters contain three configurable elements:
|
||||
// ValidateParams(k, m, technique int) (ErasureParams, error)
|
||||
// k - Number of rows in matrix
|
||||
// m - Number of colums in matrix
|
||||
// technique - Matrix type, can be either Cauchy (recommended) or Vandermonde
|
||||
// constraints: k + m < Galois Field (2^8)
|
||||
//
|
||||
// Choosing right parity and matrix technique is left for application to decide.
|
||||
//
|
||||
// But here are the few points to keep in mind
|
||||
//
|
||||
// Techniques:
|
||||
// - Vandermonde is most commonly used method for choosing coefficients in erasure
|
||||
// encoding but does not guarantee invertable for every sub matrix.
|
||||
// Users may want to adjust for k > 5. (k is data blocks)
|
||||
// - Whereas Cauchy is our recommended method for choosing coefficients in erasure coding.
|
||||
// Since any sub-matrix of a Cauchy matrix is invertable.
|
||||
//
|
||||
// Total blocks:
|
||||
// - Data blocks and Parity blocks should not be greater than 'Galois Field' (2^8)
|
||||
//
|
||||
// Example
|
||||
//
|
||||
// Creating and using an encoder
|
||||
// var bytes []byte
|
||||
// params := erasure.ValidateParams(10, 5, erasure.Cauchy)
|
||||
// encoder := erasure.NewErasure(params)
|
||||
// encodedData, length := encoder.Encode(bytes)
|
||||
//
|
||||
// Creating and using a decoder
|
||||
// var encodedData [][]byte
|
||||
// var length int
|
||||
// params := erasure.ValidateParams(10, 5, erasure.Cauchy)
|
||||
// encoder := erasure.NewErasure(params)
|
||||
// originalData, err := encoder.Decode(encodedData, length)
|
||||
//
|
||||
package erasure
|
5429
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/docs/isa-l_open_src_2.10.pdf
generated
vendored
5429
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/docs/isa-l_open_src_2.10.pdf
generated
vendored
File diff suppressed because it is too large
Load Diff
BIN
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/docs/isa-l_open_src_2.13.pdf
generated
vendored
BIN
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/docs/isa-l_open_src_2.13.pdf
generated
vendored
Binary file not shown.
|
@ -1,348 +0,0 @@
|
|||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <limits.h>
|
||||
#include <string.h> // for memset
|
||||
#include "ec_code.h"
|
||||
#include "ec_base.h" // for GF tables
|
||||
#include "ec_types.h"
|
||||
|
||||
unsigned char gf_mul(unsigned char a, unsigned char b)
|
||||
{
|
||||
#ifndef GF_LARGE_TABLES
|
||||
int i;
|
||||
|
||||
if ((a == 0) || (b == 0))
|
||||
return 0;
|
||||
|
||||
return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i];
|
||||
#else
|
||||
return gf_mul_table_base[b * 256 + a];
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned char gf_inv(unsigned char a)
|
||||
{
|
||||
#ifndef GF_LARGE_TABLES
|
||||
if (a == 0)
|
||||
return 0;
|
||||
|
||||
return gff_base[255 - gflog_base[a]];
|
||||
#else
|
||||
return gf_inv_table_base[a];
|
||||
#endif
|
||||
}
|
||||
|
||||
void gf_gen_rs_matrix(unsigned char *a, int m, int k)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char p, gen = 1;
|
||||
|
||||
memset(a, 0, k * m);
|
||||
for (i = 0; i < k; i++)
|
||||
a[k * i + i] = 1;
|
||||
|
||||
for (i = k; i < m; i++) {
|
||||
p = 1;
|
||||
for (j = 0; j < k; j++) {
|
||||
a[k * i + j] = p;
|
||||
p = gf_mul(p, gen);
|
||||
}
|
||||
gen = gf_mul(gen, 2);
|
||||
}
|
||||
}
|
||||
|
||||
void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *p;
|
||||
|
||||
// Identity matrix in high position
|
||||
memset(a, 0, k * m);
|
||||
for (i = 0; i < k; i++)
|
||||
a[k * i + i] = 1;
|
||||
|
||||
// For the rest choose 1/(i + j) | i != j
|
||||
p = &a[k * k];
|
||||
for (i = k; i < m; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
*p++ = gf_inv(i ^ j);
|
||||
|
||||
}
|
||||
|
||||
int gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n)
|
||||
{
|
||||
int i, j, k;
|
||||
unsigned char temp;
|
||||
|
||||
// Set out_mat[] to the identity matrix
|
||||
for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n)
|
||||
out_mat[i] = 0;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
out_mat[i * n + i] = 1;
|
||||
|
||||
// Inverse
|
||||
for (i = 0; i < n; i++) {
|
||||
// Check for 0 in pivot element
|
||||
if (in_mat[i * n + i] == 0) {
|
||||
// Find a row with non-zero in current column and swap
|
||||
for (j = i + 1; j < n; j++)
|
||||
if (in_mat[j * n + i])
|
||||
break;
|
||||
|
||||
if (j == n) // Couldn't find means it's singular
|
||||
return -1;
|
||||
|
||||
for (k = 0; k < n; k++) { // Swap rows i,j
|
||||
temp = in_mat[i * n + k];
|
||||
in_mat[i * n + k] = in_mat[j * n + k];
|
||||
in_mat[j * n + k] = temp;
|
||||
|
||||
temp = out_mat[i * n + k];
|
||||
out_mat[i * n + k] = out_mat[j * n + k];
|
||||
out_mat[j * n + k] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
temp = gf_inv(in_mat[i * n + i]); // 1/pivot
|
||||
for (j = 0; j < n; j++) { // Scale row i by 1/pivot
|
||||
in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
|
||||
out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
|
||||
}
|
||||
|
||||
for (j = 0; j < n; j++) {
|
||||
if (j == i)
|
||||
continue;
|
||||
|
||||
temp = in_mat[j * n + i];
|
||||
for (k = 0; k < n; k++) {
|
||||
out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
|
||||
in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Calculates const table gftbl in GF(2^8) from single input A
|
||||
// gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} }
|
||||
|
||||
void gf_vect_mul_init(unsigned char c, unsigned char *tbl)
|
||||
{
|
||||
unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
|
||||
#if __WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||
unsigned long long v1, v2, v4, v8, *t;
|
||||
unsigned long long v10, v20, v40, v80;
|
||||
unsigned char c17, c18, c20, c24;
|
||||
|
||||
t = (unsigned long long *)tbl;
|
||||
|
||||
v1 = c * 0x0100010001000100ull;
|
||||
v2 = c2 * 0x0101000001010000ull;
|
||||
v4 = c4 * 0x0101010100000000ull;
|
||||
v8 = c8 * 0x0101010101010101ull;
|
||||
|
||||
v4 = v1 ^ v2 ^ v4;
|
||||
t[0] = v4;
|
||||
t[1] = v8 ^ v4;
|
||||
|
||||
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
|
||||
v10 = c17 * 0x0100010001000100ull;
|
||||
v20 = c18 * 0x0101000001010000ull;
|
||||
v40 = c20 * 0x0101010100000000ull;
|
||||
v80 = c24 * 0x0101010101010101ull;
|
||||
|
||||
v40 = v10 ^ v20 ^ v40;
|
||||
t[2] = v40;
|
||||
t[3] = v80 ^ v40;
|
||||
|
||||
#else // 32-bit or other
|
||||
unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
|
||||
unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30,
|
||||
c31;
|
||||
|
||||
c3 = c2 ^ c;
|
||||
c5 = c4 ^ c;
|
||||
c6 = c4 ^ c2;
|
||||
c7 = c4 ^ c3;
|
||||
|
||||
c9 = c8 ^ c;
|
||||
c10 = c8 ^ c2;
|
||||
c11 = c8 ^ c3;
|
||||
c12 = c8 ^ c4;
|
||||
c13 = c8 ^ c5;
|
||||
c14 = c8 ^ c6;
|
||||
c15 = c8 ^ c7;
|
||||
|
||||
tbl[0] = 0;
|
||||
tbl[1] = c;
|
||||
tbl[2] = c2;
|
||||
tbl[3] = c3;
|
||||
tbl[4] = c4;
|
||||
tbl[5] = c5;
|
||||
tbl[6] = c6;
|
||||
tbl[7] = c7;
|
||||
tbl[8] = c8;
|
||||
tbl[9] = c9;
|
||||
tbl[10] = c10;
|
||||
tbl[11] = c11;
|
||||
tbl[12] = c12;
|
||||
tbl[13] = c13;
|
||||
tbl[14] = c14;
|
||||
tbl[15] = c15;
|
||||
|
||||
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c19 = c18 ^ c17;
|
||||
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c21 = c20 ^ c17;
|
||||
c22 = c20 ^ c18;
|
||||
c23 = c20 ^ c19;
|
||||
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c25 = c24 ^ c17;
|
||||
c26 = c24 ^ c18;
|
||||
c27 = c24 ^ c19;
|
||||
c28 = c24 ^ c20;
|
||||
c29 = c24 ^ c21;
|
||||
c30 = c24 ^ c22;
|
||||
c31 = c24 ^ c23;
|
||||
|
||||
tbl[16] = 0;
|
||||
tbl[17] = c17;
|
||||
tbl[18] = c18;
|
||||
tbl[19] = c19;
|
||||
tbl[20] = c20;
|
||||
tbl[21] = c21;
|
||||
tbl[22] = c22;
|
||||
tbl[23] = c23;
|
||||
tbl[24] = c24;
|
||||
tbl[25] = c25;
|
||||
tbl[26] = c26;
|
||||
tbl[27] = c27;
|
||||
tbl[28] = c28;
|
||||
tbl[29] = c29;
|
||||
tbl[30] = c30;
|
||||
tbl[31] = c31;
|
||||
|
||||
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||
}
|
||||
|
||||
void gf_vect_dot_prod_base(int len, int vlen, unsigned char *v,
|
||||
unsigned char **src, unsigned char *dest)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char s;
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < vlen; j++)
|
||||
s ^= gf_mul(src[j][i], v[j * 32 + 1]);
|
||||
|
||||
dest[i] = s;
|
||||
}
|
||||
}
|
||||
|
||||
void gf_vect_mad_base(int len, int vec, int vec_i,
|
||||
unsigned char *v, unsigned char *src, unsigned char *dest)
|
||||
{
|
||||
int i;
|
||||
unsigned char s;
|
||||
for (i = 0; i < len; i++) {
|
||||
s = dest[i];
|
||||
s ^= gf_mul(src[i], v[vec_i * 32 + 1]);
|
||||
dest[i] = s;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
{
|
||||
int i, j, l;
|
||||
unsigned char s;
|
||||
|
||||
for (l = 0; l < dests; l++) {
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < srcs; j++)
|
||||
s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
|
||||
|
||||
dest[l][i] = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
|
||||
unsigned char *data, unsigned char **dest)
|
||||
{
|
||||
int i, l;
|
||||
unsigned char s;
|
||||
|
||||
for (l = 0; l < rows; l++) {
|
||||
for (i = 0; i < len; i++) {
|
||||
s = dest[l][i];
|
||||
s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]);
|
||||
|
||||
dest[l][i] = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
|
||||
{
|
||||
//2nd element of table array is ref value used to fill it in
|
||||
unsigned char c = a[1];
|
||||
while (len-- > 0)
|
||||
*dest++ = gf_mul(c, *src++);
|
||||
}
|
||||
|
||||
struct slver {
|
||||
UINT16 snum;
|
||||
UINT8 ver;
|
||||
UINT8 core;
|
||||
};
|
||||
|
||||
// Version info
|
||||
struct slver gf_vect_mul_init_slver_00020035;
|
||||
struct slver gf_vect_mul_init_slver = { 0x0035, 0x02, 0x00 };
|
||||
|
||||
struct slver ec_encode_data_base_slver_00010135;
|
||||
struct slver ec_encode_data_base_slver = { 0x0135, 0x01, 0x00 };
|
||||
|
||||
struct slver gf_vect_mul_base_slver_00010136;
|
||||
struct slver gf_vect_mul_base_slver = { 0x0136, 0x01, 0x00 };
|
||||
|
||||
struct slver gf_vect_dot_prod_base_slver_00010137;
|
||||
struct slver gf_vect_dot_prod_base_slver = { 0x0137, 0x01, 0x00 };
|
File diff suppressed because it is too large
Load Diff
|
@ -1,933 +0,0 @@
|
|||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
#ifndef _ERASURE_CODE_H_
|
||||
#define _ERASURE_CODE_H_
|
||||
|
||||
/**
|
||||
* @file erasure_code.h
|
||||
* @brief Interface to functions supporting erasure code encode and decode.
|
||||
*
|
||||
* This file defines the interface to optimized functions used in erasure
|
||||
* codes. Encode and decode of erasures in GF(2^8) are made by calculating the
|
||||
* dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
|
||||
* set of coefficients. Values for the coefficients are determined by the type
|
||||
* of erasure code. Using a general dot product means that any sequence of
|
||||
* coefficients may be used including erasure codes based on random
|
||||
* coefficients.
|
||||
* Multiple versions of dot product are supplied to calculate 1-6 output
|
||||
* vectors in one pass.
|
||||
* Base GF multiply and divide functions can be sped up by defining
|
||||
* GF_LARGE_TABLES at the expense of memory size.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gf_vect_mul.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Initialize tables for fast Erasure Code encode and decode.
|
||||
*
|
||||
* Generates the expanded tables needed for fast encode or decode for erasure
|
||||
* codes on blocks of data. 32bytes is generated for each input coefficient.
|
||||
*
|
||||
* @param k The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param rows The number of output vectors to concurrently encode/decode.
|
||||
* @param a Pointer to sets of arrays of input coefficients used to encode
|
||||
* or decode data.
|
||||
* @param gftbls Pointer to start of space for concatenated output tables
|
||||
* generated from input coefficients. Must be of size 32*k*rows.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
|
||||
|
||||
/**
|
||||
* @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
|
||||
*
|
||||
* Given a list of source data blocks, generate one or multiple blocks of
|
||||
* encoded data as specified by a matrix of GF(2^8) coefficients. When given a
|
||||
* suitable set of coefficients, this function will perform the fast generation
|
||||
* or decoding of Reed-Solomon type erasure codes.
|
||||
*
|
||||
* This function determines what instruction sets are enabled and
|
||||
* selects the appropriate version at runtime.
|
||||
*
|
||||
* @param len Length of each block of data (vector) of source or dest data.
|
||||
* @param k The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param rows The number of output vectors to concurrently encode/decode.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*k*rows
|
||||
* @param data Array of pointers to source input buffers.
|
||||
* @param coding Array of pointers to coded output buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
|
||||
/**
|
||||
* @brief Generate or decode erasure codes on blocks of data.
|
||||
*
|
||||
* Arch specific version of ec_encode_data() with same parameters.
|
||||
* @requires SSE4.1
|
||||
*/
|
||||
void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
|
||||
/**
|
||||
* @brief Generate or decode erasure codes on blocks of data.
|
||||
*
|
||||
* Arch specific version of ec_encode_data() with same parameters.
|
||||
* @requires AVX
|
||||
*/
|
||||
void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
|
||||
/**
|
||||
* @brief Generate or decode erasure codes on blocks of data.
|
||||
*
|
||||
* Arch specific version of ec_encode_data() with same parameters.
|
||||
* @requires AVX2
|
||||
*/
|
||||
void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
|
||||
unsigned char **coding);
|
||||
|
||||
/**
|
||||
* @brief Generate or decode erasure codes on blocks of data, runs baseline version.
|
||||
*
|
||||
* Baseline version of ec_encode_data() with same parameters.
|
||||
*/
|
||||
void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief Generate update for encode or decode of erasure codes from single source, runs appropriate version.
|
||||
*
|
||||
* Given one source data block, update one or multiple blocks of encoded data as
|
||||
* specified by a matrix of GF(2^8) coefficients. When given a suitable set of
|
||||
* coefficients, this function will perform the fast generation or decoding of
|
||||
* Reed-Solomon type erasure codes from one input source at a time.
|
||||
*
|
||||
* This function determines what instruction sets are enabled and selects the
|
||||
* appropriate version at runtime.
|
||||
*
|
||||
* @param len Length of each block of data (vector) of source or dest data.
|
||||
* @param k The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param rows The number of output vectors to concurrently encode/decode.
|
||||
* @param vec_i The vector index corresponding to the single input source.
|
||||
* @param g_tbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*k*rows
|
||||
* @param data Pointer to single input source used to update output parity.
|
||||
* @param coding Array of pointers to coded output buffers.
|
||||
* @returns none
|
||||
*/
|
||||
void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding);
|
||||
|
||||
/**
|
||||
* @brief Generate update for encode or decode of erasure codes from single source.
|
||||
*
|
||||
* Arch specific version of ec_encode_data_update() with same parameters.
|
||||
* @requires SSE4.1
|
||||
*/
|
||||
|
||||
void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding);
|
||||
|
||||
/**
|
||||
* @brief Generate update for encode or decode of erasure codes from single source.
|
||||
*
|
||||
* Arch specific version of ec_encode_data_update() with same parameters.
|
||||
* @requires AVX
|
||||
*/
|
||||
|
||||
void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding);
|
||||
|
||||
/**
|
||||
* @brief Generate update for encode or decode of erasure codes from single source.
|
||||
*
|
||||
* Arch specific version of ec_encode_data_update() with same parameters.
|
||||
* @requires AVX2
|
||||
*/
|
||||
|
||||
void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding);
|
||||
|
||||
/**
|
||||
* @brief Generate update for encode or decode of erasure codes from single source.
|
||||
*
|
||||
* Baseline version of ec_encode_data_update().
|
||||
*/
|
||||
|
||||
void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
|
||||
unsigned char *data, unsigned char **dest);
|
||||
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product.
|
||||
*
|
||||
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||
* set of coefficients to produce each byte of the output. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 32*vlen byte constant array based on the input coefficients.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||
* on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product.
|
||||
*
|
||||
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||
* set of coefficients to produce each byte of the output. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 32*vlen byte constant array based on the input coefficients.
|
||||
* @requires AVX
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||
* on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product.
|
||||
*
|
||||
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||
* set of coefficients to produce each byte of the output. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 32*vlen byte constant array based on the input coefficients.
|
||||
* @requires AVX2
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||
* on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with two outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate two ouputs at a time. Does two
|
||||
* GF(2^8) dot products across each byte of the input array and two constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 2*32*vlen byte constant array based on the two sets of input coefficients.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with two outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate two ouputs at a time. Does two
|
||||
* GF(2^8) dot products across each byte of the input array and two constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 2*32*vlen byte constant array based on the two sets of input coefficients.
|
||||
* @requires AVX
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with two outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate two ouputs at a time. Does two
|
||||
* GF(2^8) dot products across each byte of the input array and two constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 2*32*vlen byte constant array based on the two sets of input coefficients.
|
||||
* @requires AVX2
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with three outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate three ouputs at a time. Does three
|
||||
* GF(2^8) dot products across each byte of the input array and three constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 3*32*vlen byte constant array based on the three sets of input coefficients.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with three outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate three ouputs at a time. Does three
|
||||
* GF(2^8) dot products across each byte of the input array and three constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 3*32*vlen byte constant array based on the three sets of input coefficients.
|
||||
* @requires AVX
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with three outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate three ouputs at a time. Does three
|
||||
* GF(2^8) dot products across each byte of the input array and three constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 3*32*vlen byte constant array based on the three sets of input coefficients.
|
||||
* @requires AVX2
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with four outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate four ouputs at a time. Does four
|
||||
* GF(2^8) dot products across each byte of the input array and four constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 4*32*vlen byte constant array based on the four sets of input coefficients.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with four outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate four ouputs at a time. Does four
|
||||
* GF(2^8) dot products across each byte of the input array and four constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 4*32*vlen byte constant array based on the four sets of input coefficients.
|
||||
* @requires AVX
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with four outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate four ouputs at a time. Does four
|
||||
* GF(2^8) dot products across each byte of the input array and four constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 4*32*vlen byte constant array based on the four sets of input coefficients.
|
||||
* @requires AVX2
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with five outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate five ouputs at a time. Does five
|
||||
* GF(2^8) dot products across each byte of the input array and five constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 5*32*vlen byte constant array based on the five sets of input coefficients.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with five outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate five ouputs at a time. Does five
|
||||
* GF(2^8) dot products across each byte of the input array and five constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 5*32*vlen byte constant array based on the five sets of input coefficients.
|
||||
* @requires AVX
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with five outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate five ouputs at a time. Does five
|
||||
* GF(2^8) dot products across each byte of the input array and five constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 5*32*vlen byte constant array based on the five sets of input coefficients.
|
||||
* @requires AVX2
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with six outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate six ouputs at a time. Does six
|
||||
* GF(2^8) dot products across each byte of the input array and six constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 6*32*vlen byte constant array based on the six sets of input coefficients.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with six outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate six ouputs at a time. Does six
|
||||
* GF(2^8) dot products across each byte of the input array and six constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 6*32*vlen byte constant array based on the six sets of input coefficients.
|
||||
* @requires AVX
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with six outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate six ouputs at a time. Does six
|
||||
* GF(2^8) dot products across each byte of the input array and six constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 6*32*vlen byte constant array based on the six sets of input coefficients.
|
||||
* @requires AVX2
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product, runs baseline version.
|
||||
*
|
||||
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||
* set of coefficients to produce each byte of the output. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 32*vlen byte constant array based on the input coefficients.
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||
* on the array of input coefficients. Only elements 32*CONST*j + 1
|
||||
* of this array are used, where j = (0, 1, 2...) and CONST is the
|
||||
* number of elements in the array of input coefficients. The
|
||||
* elements used correspond to the original input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product, runs appropriate version.
|
||||
*
|
||||
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||
* set of coefficients to produce each byte of the output. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 32*vlen byte constant array based on the input coefficients.
|
||||
*
|
||||
* This function determines what instruction sets are enabled and
|
||||
* selects the appropriate version at runtime.
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||
* on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply accumulate, runs appropriate version.
|
||||
*
|
||||
* Does a GF(2^8) multiply across each byte of input source with expanded
|
||||
* constant and add to destination array. Can be used for erasure coding encode
|
||||
* and decode update when only one source is available at a time. Function
|
||||
* requires pre-calculation of a 32*vec byte constant array based on the input
|
||||
* coefficients.
|
||||
*
|
||||
* This function determines what instruction sets are enabled and selects the
|
||||
* appropriate version at runtime.
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vec The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param vec_i The vector index corresponding to the single input source.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*vec.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply accumulate, arch specific version.
|
||||
*
|
||||
* Arch specific version of gf_vect_mad() with same parameters.
|
||||
* @requires SSE4.1
|
||||
*/
|
||||
|
||||
void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply accumulate, arch specific version.
|
||||
*
|
||||
* Arch specific version of gf_vect_mad() with same parameters.
|
||||
* @requires AVX
|
||||
*/
|
||||
|
||||
void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply accumulate, arch specific version.
|
||||
*
|
||||
* Arch specific version of gf_vect_mad() with same parameters.
|
||||
* @requires AVX2
|
||||
*/
|
||||
|
||||
void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply accumulate, baseline version.
|
||||
*
|
||||
* Baseline version of gf_vect_mad() with same parameters.
|
||||
*/
|
||||
|
||||
void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 2 accumulate. SSE version.
|
||||
*
|
||||
* Does a GF(2^8) multiply across each byte of input source with expanded
|
||||
* constants and add to destination arrays. Can be used for erasure coding
|
||||
* encode and decode update when only one source is available at a
|
||||
* time. Function requires pre-calculation of a 32*vec byte constant array based
|
||||
* on the input coefficients.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vec The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param vec_i The vector index corresponding to the single input source.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*vec.
|
||||
* @param src Pointer to source input array.
|
||||
* @param dest Array of pointers to destination input/outputs.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse().
|
||||
* @requires AVX
|
||||
*/
|
||||
void gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse().
|
||||
* @requires AVX2
|
||||
*/
|
||||
void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 3 accumulate. SSE version.
|
||||
*
|
||||
* Does a GF(2^8) multiply across each byte of input source with expanded
|
||||
* constants and add to destination arrays. Can be used for erasure coding
|
||||
* encode and decode update when only one source is available at a
|
||||
* time. Function requires pre-calculation of a 32*vec byte constant array based
|
||||
* on the input coefficients.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vec The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param vec_i The vector index corresponding to the single input source.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*vec.
|
||||
* @param src Pointer to source input array.
|
||||
* @param dest Array of pointers to destination input/outputs.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse().
|
||||
* @requires AVX
|
||||
*/
|
||||
void gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse().
|
||||
* @requires AVX2
|
||||
*/
|
||||
void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 4 accumulate. SSE version.
|
||||
*
|
||||
* Does a GF(2^8) multiply across each byte of input source with expanded
|
||||
* constants and add to destination arrays. Can be used for erasure coding
|
||||
* encode and decode update when only one source is available at a
|
||||
* time. Function requires pre-calculation of a 32*vec byte constant array based
|
||||
* on the input coefficients.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vec The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param vec_i The vector index corresponding to the single input source.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*vec.
|
||||
* @param src Pointer to source input array.
|
||||
* @param dest Array of pointers to destination input/outputs.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse().
|
||||
* @requires AVX
|
||||
*/
|
||||
void gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse().
|
||||
* @requires AVX2
|
||||
*/
|
||||
void gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 5 accumulate. SSE version.
|
||||
* @requires SSE4.1
|
||||
*/
|
||||
void gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 5 accumulate. AVX version.
|
||||
* @requires AVX
|
||||
*/
|
||||
void gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version.
|
||||
* @requires AVX2
|
||||
*/
|
||||
void gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 6 accumulate. SSE version.
|
||||
* @requires SSE4.1
|
||||
*/
|
||||
void gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 6 accumulate. AVX version.
|
||||
* @requires AVX
|
||||
*/
|
||||
void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version.
|
||||
* @requires AVX2
|
||||
*/
|
||||
void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
|
||||
unsigned char **dest);
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* The remaining are lib support functions used in GF(2^8) operations.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Single element GF(2^8) multiply.
|
||||
*
|
||||
* @param a Multiplicand a
|
||||
* @param b Multiplicand b
|
||||
* @returns Product of a and b in GF(2^8)
|
||||
*/
|
||||
|
||||
unsigned char gf_mul(unsigned char a, unsigned char b);
|
||||
|
||||
/**
|
||||
* @brief Single element GF(2^8) inverse.
|
||||
*
|
||||
* @param a Input element
|
||||
* @returns Field element b such that a x b = {1}
|
||||
*/
|
||||
|
||||
unsigned char gf_inv(unsigned char a);
|
||||
|
||||
/**
|
||||
* @brief Generate a matrix of coefficients to be used for encoding.
|
||||
*
|
||||
* Vandermonde matrix example of encoding coefficients where high portion of
|
||||
* matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
|
||||
* i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
|
||||
* erasure encoding but does not guarantee invertable for every sub matrix. For
|
||||
* large k it is possible to find cases where the decode matrix chosen from
|
||||
* sources and parity not in erasure are not invertable. Users may want to
|
||||
* adjust for k > 5.
|
||||
*
|
||||
* @param a [mxk] array to hold coefficients
|
||||
* @param m number of rows in matrix corresponding to srcs + parity.
|
||||
* @param k number of columns in matrix corresponding to srcs.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_gen_rs_matrix(unsigned char *a, int m, int k);
|
||||
|
||||
/**
|
||||
* @brief Generate a Cauchy matrix of coefficients to be used for encoding.
|
||||
*
|
||||
* Cauchy matrix example of encoding coefficients where high portion of matrix
|
||||
* is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
|
||||
* i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable.
|
||||
*
|
||||
* @param a [mxk] array to hold coefficients
|
||||
* @param m number of rows in matrix corresponding to srcs + parity.
|
||||
* @param k number of columns in matrix corresponding to srcs.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
|
||||
|
||||
/**
|
||||
* @brief Invert a matrix in GF(2^8)
|
||||
*
|
||||
* @param in input matrix
|
||||
* @param out output matrix such that [in] x [out] = [I] - identity matrix
|
||||
* @param n size of matrix [nxn]
|
||||
* @returns 0 successful, other fail on singular input matrix
|
||||
*/
|
||||
|
||||
int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
|
||||
|
||||
|
||||
/*************************************************************/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ERASURE_CODE_H_
|
267
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/ec_highlevel_func.c
generated
vendored
267
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/ec_highlevel_func.c
generated
vendored
|
@ -1,267 +0,0 @@
|
|||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
#include <limits.h>
|
||||
#include "ec_code.h"
|
||||
#include "ec_types.h"
|
||||
|
||||
void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < rows; i++) {
|
||||
for (j = 0; j < k; j++) {
|
||||
gf_vect_mul_init(*a++, g_tbls);
|
||||
g_tbls += 32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
|
||||
if (len < 16) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 4) {
|
||||
gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
g_tbls += 4 * k * 32;
|
||||
coding += 4;
|
||||
rows -= 4;
|
||||
}
|
||||
switch (rows) {
|
||||
case 3:
|
||||
gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
if (len < 16) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 4) {
|
||||
gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
g_tbls += 4 * k * 32;
|
||||
coding += 4;
|
||||
rows -= 4;
|
||||
}
|
||||
switch (rows) {
|
||||
case 3:
|
||||
gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
|
||||
if (len < 32) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 4) {
|
||||
gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
g_tbls += 4 * k * 32;
|
||||
coding += 4;
|
||||
rows -= 4;
|
||||
}
|
||||
switch (rows) {
|
||||
case 3:
|
||||
gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#if __WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||
|
||||
void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
{
|
||||
if (len < 16) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
{
|
||||
if (len < 16) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
|
||||
unsigned char *data, unsigned char **coding)
|
||||
{
|
||||
if (len < 32) {
|
||||
ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
while (rows > 6) {
|
||||
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
g_tbls += 6 * k * 32;
|
||||
coding += 6;
|
||||
rows -= 6;
|
||||
}
|
||||
switch (rows) {
|
||||
case 6:
|
||||
gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 5:
|
||||
gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 4:
|
||||
gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 3:
|
||||
gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||
|
||||
struct slver {
|
||||
UINT16 snum;
|
||||
UINT8 ver;
|
||||
UINT8 core;
|
||||
};
|
||||
|
||||
// Version info
|
||||
struct slver ec_init_tables_slver_00010068;
|
||||
struct slver ec_init_tables_slver = { 0x0068, 0x01, 0x00 };
|
||||
|
||||
struct slver ec_encode_data_sse_slver_00020069;
|
||||
struct slver ec_encode_data_sse_slver = { 0x0069, 0x02, 0x00 };
|
|
@ -1,41 +0,0 @@
|
|||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef _ISAL_H_
|
||||
#define _ISAL_H_
|
||||
|
||||
#define ISAL_MAJOR_VERSION 2
|
||||
#define ISAL_MINOR_VERSION 13
|
||||
#define ISAL_PATCH_VERSION 0
|
||||
#define ISAL_MAKE_VERSION(maj, min, patch) ((maj) * 0x10000 + (min) * 0x100 + (patch))
|
||||
#define ISAL_VERSION ISAL_MAKE_VERSION(ISAL_MAJOR_VERSION, ISAL_MINOR_VERSION, ISAL_PATCH_VERSION)
|
||||
|
||||
#include "ec_code.h"
|
||||
#include "gf_vect_mul.h"
|
||||
#endif //_ISAL_H_
|
|
@ -1,39 +0,0 @@
|
|||
/*
|
||||
* Mini Object Storage, (C) 2014 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef __COMMON_H__
|
||||
#define __COMMON_H__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
int32_t minio_init_encoder (int technique, int k, int m,
|
||||
unsigned char **encode_matrix,
|
||||
unsigned char **encode_tbls);
|
||||
|
||||
int32_t minio_init_decoder (int32_t *error_index,
|
||||
int k, int n, int errs,
|
||||
unsigned char *encoding_matrix,
|
||||
unsigned char **decode_matrix,
|
||||
unsigned char **decode_tbls,
|
||||
uint32_t **decode_index);
|
||||
|
||||
int32_t minio_get_source_target (int errs, int k, int m,
|
||||
int32_t *error_index,
|
||||
uint32_t *decode_index,
|
||||
unsigned char **buffs,
|
||||
unsigned char ***source,
|
||||
unsigned char ***target);
|
||||
#endif /* __COMMON_H__ */
|
|
@ -1,134 +0,0 @@
|
|||
/*
|
||||
* Minimalist Object Storage, (C) 2014 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "ec_isal-l.h"
|
||||
#include "ec_minio_common.h"
|
||||
|
||||
static
|
||||
int32_t _minio_src_index_in_error (int r, int32_t *error_index)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; error_index[i] != -1; i++) {
|
||||
if (error_index[i] == r) {
|
||||
// true
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
// false
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Separate out source data and target buffers
|
||||
int32_t minio_get_source_target (int errs, int k, int m,
|
||||
int32_t *error_index,
|
||||
uint32_t *decode_index,
|
||||
unsigned char **buffs,
|
||||
unsigned char ***source,
|
||||
unsigned char ***target)
|
||||
{
|
||||
int i;
|
||||
unsigned char *tmp_source[k];
|
||||
unsigned char *tmp_target[m];
|
||||
|
||||
if (k < 0 || m < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
memset (tmp_source, 0, k);
|
||||
memset (tmp_target, 0, m);
|
||||
|
||||
for (i = 0; i < k; i++) {
|
||||
tmp_source[i] = (unsigned char *) buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
for (i = 0; i < m; i++) {
|
||||
if (i < errs)
|
||||
tmp_target[i] = (unsigned char *) buffs[error_index[i]];
|
||||
}
|
||||
|
||||
*source = tmp_source;
|
||||
*target = tmp_target;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Generate decode matrix during the decoding phase
|
||||
*/
|
||||
|
||||
int minio_init_decoder (int32_t *error_index,
|
||||
int k, int n, int errs,
|
||||
unsigned char *encode_matrix,
|
||||
unsigned char **decode_matrix,
|
||||
unsigned char **decode_tbls,
|
||||
uint32_t **decode_index)
|
||||
{
|
||||
int i, j, r, s, l, z;
|
||||
unsigned char input_matrix[k * n];
|
||||
unsigned char inverse_matrix[k * n];
|
||||
unsigned char tmp_decode_matrix[k * n];
|
||||
unsigned char tmp_decode_tbls[k * n * 32];
|
||||
uint32_t tmp_decode_index[k];
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (_minio_src_index_in_error(r, error_index))
|
||||
r++;
|
||||
for (j = 0; j < k; j++) {
|
||||
input_matrix[k * i + j] = encode_matrix[k * r + j];
|
||||
}
|
||||
tmp_decode_index[i] = r;
|
||||
}
|
||||
|
||||
// Not all vandermonde matrix can be inverted
|
||||
if (gf_invert_matrix(input_matrix, inverse_matrix, k) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (l = 0; l < errs; l++) {
|
||||
if (error_index[l] < k) {
|
||||
// decoding matrix elements for data chunks
|
||||
for (j = 0; j < k; j++) {
|
||||
tmp_decode_matrix[k * l + j] =
|
||||
inverse_matrix[k *
|
||||
error_index[l] + j];
|
||||
}
|
||||
} else {
|
||||
int s = 0;
|
||||
// decoding matrix element for coding chunks
|
||||
for (i = 0; i < k; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < k; j++) {
|
||||
s ^= gf_mul(inverse_matrix[j * k + i],
|
||||
encode_matrix[k *
|
||||
error_index[l] + j]);
|
||||
}
|
||||
tmp_decode_matrix[k * l + i] = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ec_init_tables (k, errs, tmp_decode_matrix, tmp_decode_tbls);
|
||||
|
||||
*decode_matrix = tmp_decode_matrix;
|
||||
*decode_tbls = tmp_decode_tbls;
|
||||
*decode_index = tmp_decode_index;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Minimalist Object Storage, (C) 2014 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "ec_isal-l.h"
|
||||
#include "ec_minio_common.h"
|
||||
|
||||
/*
|
||||
Generate encode matrix during the encoding phase
|
||||
*/
|
||||
|
||||
int32_t minio_init_encoder (int technique, int k, int m,
|
||||
unsigned char **encode_matrix,
|
||||
unsigned char **encode_tbls)
|
||||
{
|
||||
size_t encode_matrix_size;
|
||||
size_t encode_tbls_size;
|
||||
unsigned char *tmp_matrix;
|
||||
unsigned char *tmp_tbls;
|
||||
|
||||
tmp_matrix = (unsigned char *) malloc (k * (k + m));
|
||||
tmp_tbls = (unsigned char *) malloc (k * (k + m) * 32);
|
||||
|
||||
if (technique == 0) {
|
||||
/*
|
||||
Commonly used method for choosing coefficients in erasure
|
||||
encoding but does not guarantee invertable for every sub
|
||||
matrix. For large k it is possible to find cases where the
|
||||
decode matrix chosen from sources and parity not in erasure
|
||||
are not invertable. Users may want to adjust for k > 5.
|
||||
-- Intel
|
||||
*/
|
||||
gf_gen_rs_matrix (tmp_matrix, k + m, k);
|
||||
} else if (technique == 1) {
|
||||
gf_gen_cauchy1_matrix (tmp_matrix, k + m, k);
|
||||
}
|
||||
|
||||
ec_init_tables(k, m, &tmp_matrix[k * k], tmp_tbls);
|
||||
|
||||
*encode_matrix = tmp_matrix;
|
||||
*encode_tbls = tmp_tbls;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,462 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define WRT_OPT wrt ..plt
|
||||
%else
|
||||
%define WRT_OPT
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define EC_ENCODE_DATA_SSE _ec_encode_data_sse
|
||||
%define EC_ENCODE_DATA_AVX _ec_encode_data_avx
|
||||
%define EC_ENCODE_DATA_AVX2 _ec_encode_data_avx2
|
||||
%define EC_ENCODE_DATA_BASE _ec_encode_data_base
|
||||
%define EC_ENCODE_DATA _ec_encode_data
|
||||
|
||||
%define EC_ENCODE_DATA_UPDATE_BASE _ec_encode_data_update_base
|
||||
%define EC_ENCODE_DATA_UPDATE_SSE _ec_encode_data_update_sse
|
||||
%define EC_ENCODE_DATA_UPDATE_AVX _ec_encode_data_update_avx
|
||||
%define EC_ENCODE_DATA_UPDATE_AVX2 _ec_encode_data_update_avx2
|
||||
|
||||
%define GF_VECT_MAD_BASE _gf_vect_mad_base
|
||||
%define GF_VECT_MAD_SSE _gf_vect_mad_sse
|
||||
%define GF_VECT_MAD_AVX _gf_vect_mad_avx
|
||||
%define GF_VECT_MAD_AVX2 _gf_vect_mad_avx2
|
||||
|
||||
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
|
||||
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
|
||||
%define GF_VECT_MUL_BASE _gf_vect_mul_base
|
||||
%define GF_VECT_MUL _gf_vect_mul
|
||||
|
||||
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
|
||||
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
|
||||
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
|
||||
%define GF_VECT_DOT_PROD_BASE _gf_vect_dot_prod_base
|
||||
%define GF_VECT_DOT_PROD _gf_vect_dot_prod
|
||||
|
||||
%else
|
||||
%define EC_ENCODE_DATA_SSE ec_encode_data_sse
|
||||
%define EC_ENCODE_DATA_AVX ec_encode_data_avx
|
||||
%define EC_ENCODE_DATA_AVX2 ec_encode_data_avx2
|
||||
%define EC_ENCODE_DATA_BASE ec_encode_data_base
|
||||
%define EC_ENCODE_DATA ec_encode_data
|
||||
|
||||
%define EC_ENCODE_DATA_UPDATE_BASE ec_encode_data_update_base
|
||||
%define EC_ENCODE_DATA_UPDATE_SSE ec_encode_data_update_sse
|
||||
%define EC_ENCODE_DATA_UPDATE_AVX ec_encode_data_update_avx
|
||||
%define EC_ENCODE_DATA_UPDATE_AVX2 ec_encode_data_update_avx2
|
||||
|
||||
%define GF_VECT_MAD_BASE gf_vect_mad_base
|
||||
%define GF_VECT_MAD_SSE gf_vect_mad_sse
|
||||
%define GF_VECT_MAD_AVX gf_vect_mad_avx
|
||||
%define GF_VECT_MAD_AVX2 gf_vect_mad_avx2
|
||||
|
||||
%define GF_VECT_MUL_SSE gf_vect_mul_sse
|
||||
%define GF_VECT_MUL_AVX gf_vect_mul_avx
|
||||
%define GF_VECT_MUL_BASE gf_vect_mul_base
|
||||
%define GF_VECT_MUL gf_vect_mul
|
||||
|
||||
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
|
||||
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
|
||||
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
|
||||
%define GF_VECT_DOT_PROD_BASE gf_vect_dot_prod_base
|
||||
%define GF_VECT_DOT_PROD gf_vect_dot_prod
|
||||
|
||||
%endif
|
||||
|
||||
%include "ec_reg_sizes.asm"
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
[bits 32]
|
||||
|
||||
%define def_wrd dd
|
||||
%define wrd_sz dword
|
||||
%define arg1 esi
|
||||
%define arg2 eax
|
||||
%define arg3 ebx
|
||||
%define arg4 ecx
|
||||
%define arg5 edx
|
||||
|
||||
%else
|
||||
|
||||
default rel
|
||||
[bits 64]
|
||||
|
||||
%define def_wrd dq
|
||||
%define wrd_sz qword
|
||||
%define arg1 rsi
|
||||
%define arg2 rax
|
||||
%define arg3 rbx
|
||||
%define arg4 rcx
|
||||
%define arg5 rdx
|
||||
|
||||
|
||||
extern EC_ENCODE_DATA_UPDATE_SSE
|
||||
extern EC_ENCODE_DATA_UPDATE_AVX
|
||||
extern EC_ENCODE_DATA_UPDATE_AVX2
|
||||
extern GF_VECT_MUL_SSE
|
||||
extern GF_VECT_MUL_AVX
|
||||
|
||||
extern GF_VECT_MAD_SSE
|
||||
extern GF_VECT_MAD_AVX
|
||||
extern GF_VECT_MAD_AVX2
|
||||
%endif
|
||||
|
||||
extern GF_VECT_MUL_BASE
|
||||
extern EC_ENCODE_DATA_BASE
|
||||
extern EC_ENCODE_DATA_UPDATE_BASE
|
||||
extern GF_VECT_DOT_PROD_BASE
|
||||
extern GF_VECT_MAD_BASE
|
||||
|
||||
extern GF_VECT_DOT_PROD_SSE
|
||||
extern GF_VECT_DOT_PROD_AVX
|
||||
extern GF_VECT_DOT_PROD_AVX2
|
||||
extern EC_ENCODE_DATA_SSE
|
||||
extern EC_ENCODE_DATA_AVX
|
||||
extern EC_ENCODE_DATA_AVX2
|
||||
|
||||
|
||||
section .data
|
||||
;;; *_mbinit are initial values for *_dispatched; is updated on first call.
|
||||
;;; Therefore, *_dispatch_init is only executed on first call.
|
||||
|
||||
ec_encode_data_dispatched:
|
||||
def_wrd ec_encode_data_mbinit
|
||||
|
||||
gf_vect_mul_dispatched:
|
||||
def_wrd gf_vect_mul_mbinit
|
||||
|
||||
gf_vect_dot_prod_dispatched:
|
||||
def_wrd gf_vect_dot_prod_mbinit
|
||||
|
||||
ec_encode_data_update_dispatched:
|
||||
def_wrd ec_encode_data_update_mbinit
|
||||
|
||||
gf_vect_mad_dispatched:
|
||||
def_wrd gf_vect_mad_mbinit
|
||||
|
||||
section .text
|
||||
;;;;
|
||||
; ec_encode_data multibinary function
|
||||
;;;;
|
||||
global EC_ENCODE_DATA:function
|
||||
ec_encode_data_mbinit:
|
||||
call ec_encode_data_dispatch_init
|
||||
|
||||
EC_ENCODE_DATA:
|
||||
jmp wrd_sz [ec_encode_data_dispatched]
|
||||
|
||||
ec_encode_data_dispatch_init:
|
||||
push arg1
|
||||
push arg2
|
||||
push arg3
|
||||
push arg4
|
||||
push arg5
|
||||
lea arg1, [EC_ENCODE_DATA_BASE WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
lea arg3, [EC_ENCODE_DATA_SSE WRT_OPT]
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||
cmovne arg1, arg3
|
||||
|
||||
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
lea arg3, [EC_ENCODE_DATA_AVX WRT_OPT]
|
||||
|
||||
jne _done_ec_encode_data_init
|
||||
mov arg1, arg3
|
||||
|
||||
;; Try for AVX2
|
||||
xor ecx, ecx
|
||||
mov eax, 7
|
||||
cpuid
|
||||
test ebx, FLAG_CPUID1_EBX_AVX2
|
||||
lea arg3, [EC_ENCODE_DATA_AVX2 WRT_OPT]
|
||||
cmovne arg1, arg3
|
||||
;; Does it have xmm and ymm support
|
||||
xor ecx, ecx
|
||||
xgetbv
|
||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
je _done_ec_encode_data_init
|
||||
lea arg1, [EC_ENCODE_DATA_SSE WRT_OPT]
|
||||
|
||||
_done_ec_encode_data_init:
|
||||
pop arg5
|
||||
pop arg4
|
||||
pop arg3
|
||||
pop arg2
|
||||
mov [ec_encode_data_dispatched], arg1
|
||||
pop arg1
|
||||
ret
|
||||
|
||||
;;;;
|
||||
; gf_vect_mul multibinary function
|
||||
;;;;
|
||||
global GF_VECT_MUL:function
|
||||
gf_vect_mul_mbinit:
|
||||
call gf_vect_mul_dispatch_init
|
||||
|
||||
GF_VECT_MUL:
|
||||
jmp wrd_sz [gf_vect_mul_dispatched]
|
||||
|
||||
gf_vect_mul_dispatch_init:
|
||||
push arg1
|
||||
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||
lea arg1, [GF_VECT_MUL_BASE]
|
||||
%else
|
||||
push rax
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
lea arg1, [GF_VECT_MUL_BASE WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_2
|
||||
lea rbx, [GF_VECT_MUL_SSE WRT_OPT]
|
||||
je _done_gf_vect_mul_dispatch_init
|
||||
mov arg1, rbx
|
||||
|
||||
;; Try for AVX
|
||||
and ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
|
||||
cmp ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
|
||||
jne _done_gf_vect_mul_dispatch_init
|
||||
|
||||
;; Does it have xmm and ymm support
|
||||
xor ecx, ecx
|
||||
xgetbv
|
||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
jne _done_gf_vect_mul_dispatch_init
|
||||
lea arg1, [GF_VECT_MUL_AVX WRT_OPT]
|
||||
|
||||
_done_gf_vect_mul_dispatch_init:
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rbx
|
||||
pop rax
|
||||
%endif ;; END 32-bit check
|
||||
mov [gf_vect_mul_dispatched], arg1
|
||||
pop arg1
|
||||
ret
|
||||
|
||||
;;;;
|
||||
; ec_encode_data_update multibinary function
|
||||
;;;;
|
||||
global EC_ENCODE_DATA_UPDATE:function
|
||||
ec_encode_data_update_mbinit:
|
||||
call ec_encode_data_update_dispatch_init
|
||||
|
||||
EC_ENCODE_DATA_UPDATE:
|
||||
jmp wrd_sz [ec_encode_data_update_dispatched]
|
||||
|
||||
ec_encode_data_update_dispatch_init:
|
||||
push arg1
|
||||
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||
lea arg1, [ec_encode_data_update_base]
|
||||
%else
|
||||
push rax
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
lea arg1, [EC_ENCODE_DATA_UPDATE_BASE WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
lea rbx, [EC_ENCODE_DATA_UPDATE_SSE WRT_OPT]
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||
cmovne arg1, rbx
|
||||
|
||||
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
lea rbx, [EC_ENCODE_DATA_UPDATE_AVX WRT_OPT]
|
||||
|
||||
jne _done_ec_encode_data_update_init
|
||||
mov rsi, rbx
|
||||
|
||||
;; Try for AVX2
|
||||
xor ecx, ecx
|
||||
mov eax, 7
|
||||
cpuid
|
||||
test ebx, FLAG_CPUID1_EBX_AVX2
|
||||
lea rbx, [EC_ENCODE_DATA_UPDATE_AVX2 WRT_OPT]
|
||||
cmovne rsi, rbx
|
||||
|
||||
;; Does it have xmm and ymm support
|
||||
xor ecx, ecx
|
||||
xgetbv
|
||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
je _done_ec_encode_data_update_init
|
||||
lea rsi, [EC_ENCODE_DATA_UPDATE_SSE WRT_OPT]
|
||||
|
||||
_done_ec_encode_data_update_init:
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rbx
|
||||
pop rax
|
||||
%endif ;; END 32-bit check
|
||||
mov [ec_encode_data_update_dispatched], arg1
|
||||
pop arg1
|
||||
ret
|
||||
|
||||
;;;;
|
||||
; gf_vect_dot_prod multibinary function
|
||||
;;;;
|
||||
global GF_VECT_DOT_PROD:function
|
||||
gf_vect_dot_prod_mbinit:
|
||||
call gf_vect_dot_prod_dispatch_init
|
||||
|
||||
GF_VECT_DOT_PROD:
|
||||
jmp wrd_sz [gf_vect_dot_prod_dispatched]
|
||||
|
||||
gf_vect_dot_prod_dispatch_init:
|
||||
push arg1
|
||||
push arg2
|
||||
push arg3
|
||||
push arg4
|
||||
push arg5
|
||||
lea arg1, [GF_VECT_DOT_PROD_BASE WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
lea arg3, [GF_VECT_DOT_PROD_SSE WRT_OPT]
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||
cmovne arg1, arg3
|
||||
|
||||
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
lea arg3, [GF_VECT_DOT_PROD_AVX WRT_OPT]
|
||||
|
||||
jne _done_gf_vect_dot_prod_init
|
||||
mov arg1, arg3
|
||||
|
||||
;; Try for AVX2
|
||||
xor ecx, ecx
|
||||
mov eax, 7
|
||||
cpuid
|
||||
test ebx, FLAG_CPUID1_EBX_AVX2
|
||||
lea arg3, [GF_VECT_DOT_PROD_AVX2 WRT_OPT]
|
||||
cmovne arg1, arg3
|
||||
;; Does it have xmm and ymm support
|
||||
xor ecx, ecx
|
||||
xgetbv
|
||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
je _done_gf_vect_dot_prod_init
|
||||
lea arg1, [GF_VECT_DOT_PROD_SSE WRT_OPT]
|
||||
|
||||
_done_gf_vect_dot_prod_init:
|
||||
pop arg5
|
||||
pop arg4
|
||||
pop arg3
|
||||
pop arg2
|
||||
mov [gf_vect_dot_prod_dispatched], arg1
|
||||
pop arg1
|
||||
ret
|
||||
|
||||
;;;;
|
||||
; gf_vect_mad multibinary function
|
||||
;;;;
|
||||
global GF_VECT_MAD:function
|
||||
gf_vect_mad_mbinit:
|
||||
call gf_vect_mad_dispatch_init
|
||||
|
||||
GF_VECT_MAD:
|
||||
jmp wrd_sz [gf_vect_mad_dispatched]
|
||||
|
||||
gf_vect_mad_dispatch_init:
|
||||
push arg1
|
||||
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||
lea arg1, [gf_vect_mad_base]
|
||||
%else
|
||||
push rax
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
lea arg1, [GF_VECT_MAD_BASE WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
lea rbx, [GF_VECT_MAD_SSE WRT_OPT]
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||
cmovne arg1, rbx
|
||||
|
||||
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
lea rbx, [GF_VECT_MAD_AVX WRT_OPT]
|
||||
|
||||
jne _done_gf_vect_mad_init
|
||||
mov rsi, rbx
|
||||
|
||||
;; Try for AVX2
|
||||
xor ecx, ecx
|
||||
mov eax, 7
|
||||
cpuid
|
||||
test ebx, FLAG_CPUID1_EBX_AVX2
|
||||
lea rbx, [GF_VECT_MAD_AVX2 WRT_OPT]
|
||||
cmovne rsi, rbx
|
||||
|
||||
;; Does it have xmm and ymm support
|
||||
xor ecx, ecx
|
||||
xgetbv
|
||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
je _done_gf_vect_mad_init
|
||||
lea rsi, [GF_VECT_MAD_SSE WRT_OPT]
|
||||
|
||||
_done_gf_vect_mad_init:
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rbx
|
||||
pop rax
|
||||
%endif ;; END 32-bit check
|
||||
mov [gf_vect_mad_dispatched], arg1
|
||||
pop arg1
|
||||
ret
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion EC_ENCODE_DATA, 00, 03, 0133
|
||||
slversion GF_VECT_MUL, 00, 02, 0134
|
||||
slversion EC_ENCODE_DATA_UPDATE, 00, 02, 0212
|
||||
slversion GF_VECT_DOT_PROD, 00, 02, 0138
|
||||
slversion GF_VECT_MAD, 00, 01, 0213
|
|
@ -1,96 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define EFLAGS_HAS_CPUID (1<<21)
|
||||
%define FLAG_CPUID1_ECX_CLMUL (1<<1)
|
||||
%define FLAG_CPUID1_EDX_SSE2 (1<<26)
|
||||
%define FLAG_CPUID1_ECX_SSE3 (1)
|
||||
%define FLAG_CPUID1_ECX_SSE4_1 (1<<19)
|
||||
%define FLAG_CPUID1_ECX_SSE4_2 (1<<20)
|
||||
%define FLAG_CPUID1_ECX_POPCNT (1<<23)
|
||||
%define FLAG_CPUID1_ECX_AESNI (1<<25)
|
||||
%define FLAG_CPUID1_ECX_OSXSAVE (1<<27)
|
||||
%define FLAG_CPUID1_ECX_AVX (1<<28)
|
||||
%define FLAG_CPUID1_EBX_AVX2 (1<<5)
|
||||
%define FLAG_XGETBV_EAX_XMM_YMM 0x6
|
||||
|
||||
%define FLAG_CPUID1_EAX_AVOTON 0x000406d0
|
||||
|
||||
; define d and w variants for registers
|
||||
|
||||
%define raxd eax
|
||||
%define raxw ax
|
||||
%define raxb al
|
||||
|
||||
%define rbxd ebx
|
||||
%define rbxw bx
|
||||
%define rbxb bl
|
||||
|
||||
%define rcxd ecx
|
||||
%define rcxw cx
|
||||
%define rcxb cl
|
||||
|
||||
%define rdxd edx
|
||||
%define rdxw dx
|
||||
%define rdxb dl
|
||||
|
||||
%define rsid esi
|
||||
%define rsiw si
|
||||
%define rsib sil
|
||||
|
||||
%define rdid edi
|
||||
%define rdiw di
|
||||
%define rdib dil
|
||||
|
||||
%define rbpd ebp
|
||||
%define rbpw bp
|
||||
%define rbpb bpl
|
||||
|
||||
%define ymm0x xmm0
|
||||
%define ymm1x xmm1
|
||||
%define ymm2x xmm2
|
||||
%define ymm3x xmm3
|
||||
%define ymm4x xmm4
|
||||
%define ymm5x xmm5
|
||||
%define ymm6x xmm6
|
||||
%define ymm7x xmm7
|
||||
%define ymm8x xmm8
|
||||
%define ymm9x xmm9
|
||||
%define ymm10x xmm10
|
||||
%define ymm11x xmm11
|
||||
%define ymm12x xmm12
|
||||
%define ymm13x xmm13
|
||||
%define ymm14x xmm14
|
||||
%define ymm15x xmm15
|
||||
|
||||
%define DWORD(reg) reg %+ d
|
||||
%define WORD(reg) reg %+ w
|
||||
%define BYTE(reg) reg %+ b
|
||||
|
||||
%define XWORD(reg) reg %+ x
|
|
@ -1,80 +0,0 @@
|
|||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* @file types.h
|
||||
* @brief Defines standard width types.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __TYPES_H
|
||||
#define __TYPES_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(__unix__) && !defined(__APPLE__)
|
||||
#ifdef __MINGW32__
|
||||
# include <_mingw.h>
|
||||
#endif
|
||||
typedef unsigned __int64 UINT64;
|
||||
typedef __int64 INT64;
|
||||
typedef unsigned __int32 UINT32;
|
||||
typedef unsigned __int16 UINT16;
|
||||
typedef unsigned char UINT8;
|
||||
#else
|
||||
typedef unsigned long int UINT64;
|
||||
typedef long int INT64;
|
||||
typedef unsigned int UINT32;
|
||||
typedef unsigned short int UINT16;
|
||||
typedef unsigned char UINT8;
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__unix__) || defined(__APPLE__)
|
||||
# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
|
||||
# define __forceinline static inline
|
||||
#else
|
||||
# define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
|
||||
# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
# define DEBUG_PRINT(x) printf x
|
||||
#else
|
||||
# define DEBUG_PRINT(x) do {} while (0)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //__TYPES_H
|
|
@ -1,122 +0,0 @@
|
|||
/*
|
||||
* Minimalist Object Storage, (C) 2014 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package erasure
|
||||
|
||||
// #cgo CFLAGS: -O0
|
||||
// #include <stdlib.h>
|
||||
// #include "ec_isal-l.h"
|
||||
// #include "ec_minio_common.h"
|
||||
import "C"
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Decode decodes erasure coded blocks of data into its original
|
||||
// form. Erasure coded data contains K data blocks and M parity
|
||||
// blocks. Decode can withstand data loss up to any M number of blocks.
|
||||
//
|
||||
// "encodedDataBlocks" is an array of K data blocks and M parity
|
||||
// blocks. Data blocks are position and order dependent. Missing blocks
|
||||
// are set to "nil". There must be at least "K" number of data|parity
|
||||
// blocks.
|
||||
//
|
||||
// "dataLen" is the length of original source data
|
||||
func (e *Erasure) Decode(encodedDataBlocks [][]byte, dataLen int) (decodedData []byte, err error) {
|
||||
var source, target **C.uchar
|
||||
|
||||
k := int(e.params.K)
|
||||
m := int(e.params.M)
|
||||
n := k + m
|
||||
// We need the data and parity blocks preserved in the same order. Missing blocks are set to nil.
|
||||
if len(encodedDataBlocks) != n {
|
||||
msg := fmt.Sprintf("Encoded data blocks slice must of length [%d]", n)
|
||||
return nil, errors.New(msg)
|
||||
}
|
||||
|
||||
// Length of a single encoded block
|
||||
encodedBlockLen := GetEncodedBlockLen(dataLen, uint8(k))
|
||||
|
||||
// Keep track of errors per block.
|
||||
missingEncodedBlocks := make([]int, n+1)
|
||||
var missingEncodedBlocksCount int
|
||||
|
||||
// Check for the missing encoded blocks
|
||||
for i := range encodedDataBlocks {
|
||||
if encodedDataBlocks[i] == nil || len(encodedDataBlocks[i]) == 0 {
|
||||
missingEncodedBlocks[missingEncodedBlocksCount] = i
|
||||
missingEncodedBlocksCount++
|
||||
}
|
||||
}
|
||||
missingEncodedBlocks[missingEncodedBlocksCount] = -1
|
||||
missingEncodedBlocksCount++
|
||||
|
||||
// Cannot reconstruct original data. Need at least M number of data or parity blocks.
|
||||
if missingEncodedBlocksCount-1 > m {
|
||||
return nil, fmt.Errorf("Cannot reconstruct original data. Need at least [%d] data or parity blocks", m)
|
||||
}
|
||||
|
||||
// Convert from Go int slice to C int array
|
||||
missingEncodedBlocksC := intSlice2CIntArray(missingEncodedBlocks[:missingEncodedBlocksCount])
|
||||
|
||||
// Allocate buffer for the missing blocks
|
||||
for i := range encodedDataBlocks {
|
||||
if encodedDataBlocks[i] == nil || len(encodedDataBlocks[i]) == 0 {
|
||||
encodedDataBlocks[i] = make([]byte, encodedBlockLen)
|
||||
}
|
||||
}
|
||||
|
||||
// If not already initialized, recompute and cache
|
||||
if e.decodeMatrix == nil || e.decodeTbls == nil || e.decodeIndex == nil {
|
||||
var decodeMatrix, decodeTbls *C.uchar
|
||||
var decodeIndex *C.uint32_t
|
||||
|
||||
C.minio_init_decoder(missingEncodedBlocksC, C.int(k), C.int(n), C.int(missingEncodedBlocksCount-1),
|
||||
e.encodeMatrix, &decodeMatrix, &decodeTbls, &decodeIndex)
|
||||
|
||||
// cache this for future needs
|
||||
e.decodeMatrix = decodeMatrix
|
||||
e.decodeTbls = decodeTbls
|
||||
e.decodeIndex = decodeIndex
|
||||
}
|
||||
|
||||
// Make a slice of pointers to encoded blocks. Necessary to bridge to the C world.
|
||||
pointers := make([]*byte, n)
|
||||
for i := range encodedDataBlocks {
|
||||
pointers[i] = &encodedDataBlocks[i][0]
|
||||
}
|
||||
|
||||
// Get pointers to source "data" and target "parity" blocks from the output byte array.
|
||||
ret := C.minio_get_source_target(C.int(missingEncodedBlocksCount-1), C.int(k), C.int(m), missingEncodedBlocksC,
|
||||
e.decodeIndex, (**C.uchar)(unsafe.Pointer(&pointers[0])), &source, &target)
|
||||
if int(ret) == -1 {
|
||||
return nil, errors.New("Unable to decode data")
|
||||
}
|
||||
|
||||
// Decode data
|
||||
C.ec_encode_data(C.int(encodedBlockLen), C.int(k), C.int(missingEncodedBlocksCount-1), e.decodeTbls,
|
||||
source, target)
|
||||
|
||||
// Allocate buffer to output buffer
|
||||
decodedData = make([]byte, 0, encodedBlockLen*int(k))
|
||||
for i := 0; i < int(k); i++ {
|
||||
decodedData = append(decodedData, encodedDataBlocks[i]...)
|
||||
}
|
||||
|
||||
return decodedData[:dataLen], nil
|
||||
}
|
|
@ -1,197 +0,0 @@
|
|||
/*
|
||||
* Minimalist Object Storage, (C) 2014 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package erasure
|
||||
|
||||
// #cgo CFLAGS: -O0
|
||||
// #include <stdlib.h>
|
||||
// #include "ec_isal-l.h"
|
||||
// #include "ec_minio_common.h"
|
||||
import "C"
|
||||
import (
|
||||
"errors"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Technique - type of matrix type used in encoding
|
||||
type Technique uint8
|
||||
|
||||
// Different types of supported matrix types
|
||||
const (
|
||||
Vandermonde Technique = iota
|
||||
Cauchy
|
||||
None
|
||||
)
|
||||
|
||||
// Default Data and Parity blocks
|
||||
const (
|
||||
K = 10
|
||||
M = 3
|
||||
)
|
||||
|
||||
// Block alignment
|
||||
const (
|
||||
SIMDAlign = 32
|
||||
)
|
||||
|
||||
// ErasureParams is a configuration set for building an encoder. It is created using ValidateParams().
|
||||
type ErasureParams struct {
|
||||
K uint8
|
||||
M uint8
|
||||
Technique Technique // cauchy or vandermonde matrix (RS)
|
||||
}
|
||||
|
||||
// Erasure is an object used to encode and decode data.
|
||||
type Erasure struct {
|
||||
params *ErasureParams
|
||||
encodeMatrix, encodeTbls *C.uchar
|
||||
decodeMatrix, decodeTbls *C.uchar
|
||||
decodeIndex *C.uint32_t
|
||||
}
|
||||
|
||||
// ValidateParams creates an ErasureParams object.
|
||||
//
|
||||
// k and m represent the matrix size, which corresponds to the protection level
|
||||
// technique is the matrix type. Valid inputs are Cauchy (recommended) or Vandermonde.
|
||||
//
|
||||
func ValidateParams(k, m uint8, technique Technique) (*ErasureParams, error) {
|
||||
if k < 1 {
|
||||
return nil, errors.New("k cannot be zero")
|
||||
}
|
||||
|
||||
if m < 1 {
|
||||
return nil, errors.New("m cannot be zero")
|
||||
}
|
||||
|
||||
if k+m > 255 {
|
||||
return nil, errors.New("(k + m) cannot be bigger than Galois field GF(2^8) - 1")
|
||||
}
|
||||
|
||||
switch technique {
|
||||
case Vandermonde:
|
||||
break
|
||||
case Cauchy:
|
||||
break
|
||||
default:
|
||||
return nil, errors.New("Technique can be either vandermonde or cauchy")
|
||||
}
|
||||
|
||||
return &ErasureParams{
|
||||
K: k,
|
||||
M: m,
|
||||
Technique: technique,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// NewErasure creates an encoder object with a given set of parameters.
|
||||
func NewErasure(ep *ErasureParams) *Erasure {
|
||||
var k = C.int(ep.K)
|
||||
var m = C.int(ep.M)
|
||||
|
||||
var encodeMatrix *C.uchar
|
||||
var encodeTbls *C.uchar
|
||||
|
||||
C.minio_init_encoder(C.int(ep.Technique), k, m, &encodeMatrix,
|
||||
&encodeTbls)
|
||||
|
||||
return &Erasure{
|
||||
params: ep,
|
||||
encodeMatrix: encodeMatrix,
|
||||
encodeTbls: encodeTbls,
|
||||
decodeMatrix: nil,
|
||||
decodeTbls: nil,
|
||||
decodeIndex: nil,
|
||||
}
|
||||
}
|
||||
|
||||
// GetEncodedBlocksLen - total length of all encoded blocks
|
||||
func GetEncodedBlocksLen(inputLen int, k, m uint8) (outputLen int) {
|
||||
outputLen = GetEncodedBlockLen(inputLen, k) * int(k+m)
|
||||
return outputLen
|
||||
}
|
||||
|
||||
// GetEncodedBlockLen - length per block of encoded blocks
|
||||
func GetEncodedBlockLen(inputLen int, k uint8) (encodedOutputLen int) {
|
||||
alignment := int(k) * SIMDAlign
|
||||
remainder := inputLen % alignment
|
||||
|
||||
paddedInputLen := inputLen
|
||||
if remainder != 0 {
|
||||
paddedInputLen = inputLen + (alignment - remainder)
|
||||
}
|
||||
encodedOutputLen = paddedInputLen / int(k)
|
||||
return encodedOutputLen
|
||||
}
|
||||
|
||||
// Encode erasure codes a block of data in "k" data blocks and "m" parity blocks.
|
||||
// Output is [k+m][]blocks of data and parity slices.
|
||||
func (e *Erasure) Encode(inputData []byte) (encodedBlocks [][]byte, err error) {
|
||||
k := int(e.params.K) // "k" data blocks
|
||||
m := int(e.params.M) // "m" parity blocks
|
||||
n := k + m // "n" total encoded blocks
|
||||
|
||||
// Length of a single encoded chunk.
|
||||
// Total number of encoded chunks = "k" data + "m" parity blocks
|
||||
encodedBlockLen := GetEncodedBlockLen(len(inputData), uint8(k))
|
||||
|
||||
// Length of total number of "k" data chunks
|
||||
encodedDataBlocksLen := encodedBlockLen * k
|
||||
|
||||
// Length of extra padding required for the data blocks.
|
||||
encodedDataBlocksPadLen := encodedDataBlocksLen - len(inputData)
|
||||
|
||||
// Extend inputData buffer to accommodate coded data blocks if necesssary
|
||||
if encodedDataBlocksPadLen > 0 {
|
||||
padding := make([]byte, encodedDataBlocksPadLen)
|
||||
// Expand with new padded blocks to the byte array
|
||||
inputData = append(inputData, padding...)
|
||||
}
|
||||
|
||||
// Extend inputData buffer to accommodate coded parity blocks
|
||||
{ // Local Scope
|
||||
encodedParityBlocksLen := encodedBlockLen * m
|
||||
parityBlocks := make([]byte, encodedParityBlocksLen)
|
||||
inputData = append(inputData, parityBlocks...)
|
||||
}
|
||||
|
||||
// Allocate memory to the "encoded blocks" return buffer
|
||||
encodedBlocks = make([][]byte, n) // Return buffer
|
||||
|
||||
// Nessary to bridge Go to the C world. C requires 2D arry of pointers to
|
||||
// byte array. "encodedBlocks" is a 2D slice.
|
||||
pointersToEncodedBlock := make([]*byte, n) // Pointers to encoded blocks.
|
||||
|
||||
// Copy data block slices to encoded block buffer
|
||||
for i := 0; i < k; i++ {
|
||||
encodedBlocks[i] = inputData[i*encodedBlockLen : (i+1)*encodedBlockLen]
|
||||
pointersToEncodedBlock[i] = &encodedBlocks[i][0]
|
||||
}
|
||||
|
||||
// Copy erasure block slices to encoded block buffer
|
||||
for i := k; i < n; i++ {
|
||||
encodedBlocks[i] = make([]byte, encodedBlockLen)
|
||||
pointersToEncodedBlock[i] = &encodedBlocks[i][0]
|
||||
}
|
||||
|
||||
// Erasure code the data into K data blocks and M parity
|
||||
// blocks. Only the parity blocks are filled. Data blocks remain
|
||||
// intact.
|
||||
C.ec_encode_data(C.int(encodedBlockLen), C.int(k), C.int(m), e.encodeTbls,
|
||||
(**C.uchar)(unsafe.Pointer(&pointersToEncodedBlock[:k][0])), // Pointers to data blocks
|
||||
(**C.uchar)(unsafe.Pointer(&pointersToEncodedBlock[k:][0]))) // Pointers to parity blocks
|
||||
|
||||
return encodedBlocks, nil
|
||||
}
|
43
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/erasure_yasm_darwin.go
generated
vendored
43
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/erasure_yasm_darwin.go
generated
vendored
|
@ -1,43 +0,0 @@
|
|||
// !build amd64
|
||||
|
||||
package erasure
|
||||
|
||||
//go:generate yasm -f macho64 ec_multibinary.asm -o ec_multibinary.syso
|
||||
//go:generate yasm -f macho64 gf_2vect_mad_avx2.asm -o gf_2vect_mad_avx2.syso
|
||||
//go:generate yasm -f macho64 gf_2vect_mad_avx.asm -o gf_2vect_mad_avx.syso
|
||||
//go:generate yasm -f macho64 gf_2vect_mad_sse.asm -o gf_2vect_mad_sse.syso
|
||||
//go:generate yasm -f macho64 gf_3vect_mad_avx2.asm -o gf_3vect_mad_avx2.syso
|
||||
//go:generate yasm -f macho64 gf_3vect_mad_avx.asm -o gf_3vect_mad_avx.syso
|
||||
//go:generate yasm -f macho64 gf_3vect_mad_sse.asm -o gf_3vect_mad_sse.syso
|
||||
//go:generate yasm -f macho64 gf_4vect_mad_avx2.asm -o gf_4vect_mad_avx2.syso
|
||||
//go:generate yasm -f macho64 gf_4vect_mad_avx.asm -o gf_4vect_mad_avx.syso
|
||||
//go:generate yasm -f macho64 gf_4vect_mad_sse.asm -o gf_4vect_mad_sse.syso
|
||||
//go:generate yasm -f macho64 gf_5vect_mad_avx2.asm -o gf_5vect_mad_avx2.syso
|
||||
//go:generate yasm -f macho64 gf_5vect_mad_avx.asm -o gf_5vect_mad_avx.syso
|
||||
//go:generate yasm -f macho64 gf_5vect_mad_sse.asm -o gf_5vect_mad_sse.syso
|
||||
//go:generate yasm -f macho64 gf_6vect_mad_avx2.asm -o gf_6vect_mad_avx2.syso
|
||||
//go:generate yasm -f macho64 gf_6vect_mad_avx.asm -o gf_6vect_mad_avx.syso
|
||||
//go:generate yasm -f macho64 gf_6vect_mad_sse.asm -o gf_6vect_mad_sse.syso
|
||||
//go:generate yasm -f macho64 gf_vect_mad_avx2.asm -o gf_vect_mad_avx2.syso
|
||||
//go:generate yasm -f macho64 gf_vect_mad_avx.asm -o gf_vect_mad_avx.syso
|
||||
//go:generate yasm -f macho64 gf_vect_mad_sse.asm -o gf_vect_mad_sse.syso
|
||||
//go:generate yasm -f macho64 gf_2vect_dot_prod_avx2.asm -o gf_2vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f macho64 gf_2vect_dot_prod_avx.asm -o gf_2vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f macho64 gf_2vect_dot_prod_sse.asm -o gf_2vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f macho64 gf_3vect_dot_prod_avx2.asm -o gf_3vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f macho64 gf_3vect_dot_prod_avx.asm -o gf_3vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f macho64 gf_3vect_dot_prod_sse.asm -o gf_3vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f macho64 gf_4vect_dot_prod_avx2.asm -o gf_4vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f macho64 gf_4vect_dot_prod_avx.asm -o gf_4vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f macho64 gf_4vect_dot_prod_sse.asm -o gf_4vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f macho64 gf_5vect_dot_prod_avx2.asm -o gf_5vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f macho64 gf_5vect_dot_prod_avx.asm -o gf_5vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f macho64 gf_5vect_dot_prod_sse.asm -o gf_5vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f macho64 gf_6vect_dot_prod_avx2.asm -o gf_6vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f macho64 gf_6vect_dot_prod_avx.asm -o gf_6vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f macho64 gf_6vect_dot_prod_sse.asm -o gf_6vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f macho64 gf_vect_dot_prod_avx2.asm -o gf_vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f macho64 gf_vect_dot_prod_avx.asm -o gf_vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f macho64 gf_vect_dot_prod_sse.asm -o gf_vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f macho64 gf_vect_mul_avx.asm -o gf_vect_mul_avx.syso
|
||||
//go:generate yasm -f macho64 gf_vect_mul_sse.asm -o gf_vect_mul_sse.syso
|
43
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/erasure_yasm_linux.go
generated
vendored
43
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/erasure_yasm_linux.go
generated
vendored
|
@ -1,43 +0,0 @@
|
|||
// !build amd64
|
||||
|
||||
package erasure
|
||||
|
||||
//go:generate yasm -f elf64 ec_multibinary.asm -o ec_multibinary.syso
|
||||
//go:generate yasm -f elf64 gf_2vect_mad_avx2.asm -o gf_2vect_mad_avx2.syso
|
||||
//go:generate yasm -f elf64 gf_2vect_mad_avx.asm -o gf_2vect_mad_avx.syso
|
||||
//go:generate yasm -f elf64 gf_2vect_mad_sse.asm -o gf_2vect_mad_sse.syso
|
||||
//go:generate yasm -f elf64 gf_3vect_mad_avx2.asm -o gf_3vect_mad_avx2.syso
|
||||
//go:generate yasm -f elf64 gf_3vect_mad_avx.asm -o gf_3vect_mad_avx.syso
|
||||
//go:generate yasm -f elf64 gf_3vect_mad_sse.asm -o gf_3vect_mad_sse.syso
|
||||
//go:generate yasm -f elf64 gf_4vect_mad_avx2.asm -o gf_4vect_mad_avx2.syso
|
||||
//go:generate yasm -f elf64 gf_4vect_mad_avx.asm -o gf_4vect_mad_avx.syso
|
||||
//go:generate yasm -f elf64 gf_4vect_mad_sse.asm -o gf_4vect_mad_sse.syso
|
||||
//go:generate yasm -f elf64 gf_5vect_mad_avx2.asm -o gf_5vect_mad_avx2.syso
|
||||
//go:generate yasm -f elf64 gf_5vect_mad_avx.asm -o gf_5vect_mad_avx.syso
|
||||
//go:generate yasm -f elf64 gf_5vect_mad_sse.asm -o gf_5vect_mad_sse.syso
|
||||
//go:generate yasm -f elf64 gf_6vect_mad_avx2.asm -o gf_6vect_mad_avx2.syso
|
||||
//go:generate yasm -f elf64 gf_6vect_mad_avx.asm -o gf_6vect_mad_avx.syso
|
||||
//go:generate yasm -f elf64 gf_6vect_mad_sse.asm -o gf_6vect_mad_sse.syso
|
||||
//go:generate yasm -f elf64 gf_vect_mad_avx2.asm -o gf_vect_mad_avx2.syso
|
||||
//go:generate yasm -f elf64 gf_vect_mad_avx.asm -o gf_vect_mad_avx.syso
|
||||
//go:generate yasm -f elf64 gf_vect_mad_sse.asm -o gf_vect_mad_sse.syso
|
||||
//go:generate yasm -f elf64 gf_2vect_dot_prod_avx2.asm -o gf_2vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f elf64 gf_2vect_dot_prod_avx.asm -o gf_2vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f elf64 gf_2vect_dot_prod_sse.asm -o gf_2vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f elf64 gf_3vect_dot_prod_avx2.asm -o gf_3vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f elf64 gf_3vect_dot_prod_avx.asm -o gf_3vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f elf64 gf_3vect_dot_prod_sse.asm -o gf_3vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f elf64 gf_4vect_dot_prod_avx2.asm -o gf_4vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f elf64 gf_4vect_dot_prod_avx.asm -o gf_4vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f elf64 gf_4vect_dot_prod_sse.asm -o gf_4vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f elf64 gf_5vect_dot_prod_avx2.asm -o gf_5vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f elf64 gf_5vect_dot_prod_avx.asm -o gf_5vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f elf64 gf_5vect_dot_prod_sse.asm -o gf_5vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f elf64 gf_6vect_dot_prod_avx2.asm -o gf_6vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f elf64 gf_6vect_dot_prod_avx.asm -o gf_6vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f elf64 gf_6vect_dot_prod_sse.asm -o gf_6vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f elf64 gf_vect_dot_prod_avx2.asm -o gf_vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f elf64 gf_vect_dot_prod_avx.asm -o gf_vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f elf64 gf_vect_dot_prod_sse.asm -o gf_vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f elf64 gf_vect_mul_avx.asm -o gf_vect_mul_avx.syso
|
||||
//go:generate yasm -f elf64 gf_vect_mul_sse.asm -o gf_vect_mul_sse.syso
|
43
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/erasure_yasm_windows.go
generated
vendored
43
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/erasure_yasm_windows.go
generated
vendored
|
@ -1,43 +0,0 @@
|
|||
// !build amd64
|
||||
|
||||
package erasure
|
||||
|
||||
//go:generate yasm -f win64 ec_multibinary.asm -o ec_multibinary.syso
|
||||
//go:generate yasm -f win64 gf_2vect_mad_avx2.asm -o gf_2vect_mad_avx2.syso
|
||||
//go:generate yasm -f win64 gf_2vect_mad_avx.asm -o gf_2vect_mad_avx.syso
|
||||
//go:generate yasm -f win64 gf_2vect_mad_sse.asm -o gf_2vect_mad_sse.syso
|
||||
//go:generate yasm -f win64 gf_3vect_mad_avx2.asm -o gf_3vect_mad_avx2.syso
|
||||
//go:generate yasm -f win64 gf_3vect_mad_avx.asm -o gf_3vect_mad_avx.syso
|
||||
//go:generate yasm -f win64 gf_3vect_mad_sse.asm -o gf_3vect_mad_sse.syso
|
||||
//go:generate yasm -f win64 gf_4vect_mad_avx2.asm -o gf_4vect_mad_avx2.syso
|
||||
//go:generate yasm -f win64 gf_4vect_mad_avx.asm -o gf_4vect_mad_avx.syso
|
||||
//go:generate yasm -f win64 gf_4vect_mad_sse.asm -o gf_4vect_mad_sse.syso
|
||||
//go:generate yasm -f win64 gf_5vect_mad_avx2.asm -o gf_5vect_mad_avx2.syso
|
||||
//go:generate yasm -f win64 gf_5vect_mad_avx.asm -o gf_5vect_mad_avx.syso
|
||||
//go:generate yasm -f win64 gf_5vect_mad_sse.asm -o gf_5vect_mad_sse.syso
|
||||
//go:generate yasm -f win64 gf_6vect_mad_avx2.asm -o gf_6vect_mad_avx2.syso
|
||||
//go:generate yasm -f win64 gf_6vect_mad_avx.asm -o gf_6vect_mad_avx.syso
|
||||
//go:generate yasm -f win64 gf_6vect_mad_sse.asm -o gf_6vect_mad_sse.syso
|
||||
//go:generate yasm -f win64 gf_vect_mad_avx2.asm -o gf_vect_mad_avx2.syso
|
||||
//go:generate yasm -f win64 gf_vect_mad_avx.asm -o gf_vect_mad_avx.syso
|
||||
//go:generate yasm -f win64 gf_vect_mad_sse.asm -o gf_vect_mad_sse.syso
|
||||
//go:generate yasm -f win64 gf_2vect_dot_prod_avx2.asm -o gf_2vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f win64 gf_2vect_dot_prod_avx.asm -o gf_2vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f win64 gf_2vect_dot_prod_sse.asm -o gf_2vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f win64 gf_3vect_dot_prod_avx2.asm -o gf_3vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f win64 gf_3vect_dot_prod_avx.asm -o gf_3vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f win64 gf_3vect_dot_prod_sse.asm -o gf_3vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f win64 gf_4vect_dot_prod_avx2.asm -o gf_4vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f win64 gf_4vect_dot_prod_avx.asm -o gf_4vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f win64 gf_4vect_dot_prod_sse.asm -o gf_4vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f win64 gf_5vect_dot_prod_avx2.asm -o gf_5vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f win64 gf_5vect_dot_prod_avx.asm -o gf_5vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f win64 gf_5vect_dot_prod_sse.asm -o gf_5vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f win64 gf_6vect_dot_prod_avx2.asm -o gf_6vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f win64 gf_6vect_dot_prod_avx.asm -o gf_6vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f win64 gf_6vect_dot_prod_sse.asm -o gf_6vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f win64 gf_vect_dot_prod_avx2.asm -o gf_vect_dot_prod_avx2.syso
|
||||
//go:generate yasm -f win64 gf_vect_dot_prod_avx.asm -o gf_vect_dot_prod_avx.syso
|
||||
//go:generate yasm -f win64 gf_vect_dot_prod_sse.asm -o gf_vect_dot_prod_sse.syso
|
||||
//go:generate yasm -f win64 gf_vect_mul_avx.asm -o gf_vect_mul_avx.syso
|
||||
//go:generate yasm -f win64 gf_vect_mul_sse.asm -o gf_vect_mul_sse.syso
|
381
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_2vect_dot_prod_avx.asm
generated
vendored
381
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_2vect_dot_prod_avx.asm
generated
vendored
|
@ -1,381 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_2vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_2VECT_DOT_PROD_AVX _gf_2vect_dot_prod_avx
|
||||
%else
|
||||
%define GF_2VECT_DOT_PROD_AVX gf_2vect_dot_prod_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 3*16 + 3*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_reg r12, 3*16 + 0*8
|
||||
save_reg r13, 3*16 + 1*8
|
||||
save_reg r14, 3*16 + 2*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
mov r12, [rsp + 3*16 + 0*8]
|
||||
mov r13, [rsp + 3*16 + 1*8]
|
||||
mov r14, [rsp + 3*16 + 2*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
;;;================== High Address;
|
||||
;;; arg4
|
||||
;;; arg3
|
||||
;;; arg2
|
||||
;;; arg1
|
||||
;;; arg0
|
||||
;;; return
|
||||
;;;<================= esp of caller
|
||||
;;; ebp
|
||||
;;;<================= ebp = esp
|
||||
;;; var0
|
||||
;;; esi
|
||||
;;; edi
|
||||
;;; ebx
|
||||
;;;<================= esp of callee
|
||||
;;;
|
||||
;;;================== Low Address;
|
||||
|
||||
%define PS 4
|
||||
%define LOG_PS 2
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp + PS*2 + PS*x]
|
||||
%define var(x) [ebp - PS - PS*x]
|
||||
|
||||
%define trans ecx
|
||||
%define trans2 esi
|
||||
%define arg0 trans ;trans and trans2 are for the variables in stack
|
||||
%define arg0_m arg(0)
|
||||
%define arg1 ebx
|
||||
%define arg2 arg2_m
|
||||
%define arg2_m arg(2)
|
||||
%define arg3 trans
|
||||
%define arg3_m arg(3)
|
||||
%define arg4 trans
|
||||
%define arg4_m arg(4)
|
||||
%define tmp edx
|
||||
%define tmp2 edi
|
||||
%define tmp3 trans2
|
||||
%define tmp4 trans2
|
||||
%define tmp4_m var(0)
|
||||
%define return eax
|
||||
%macro SLDR 2 ;; stack load/restore
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
sub esp, PS*1 ;1 local variable
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg1, arg(1)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
add esp, PS*1 ;1 local variable
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
|
||||
%define vec_i tmp2
|
||||
%define ptr tmp3
|
||||
%define dest2 tmp4
|
||||
%define pos return
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
%define len_m arg0_m
|
||||
%define src_m arg3_m
|
||||
%define dest1_m arg4_m
|
||||
%define dest2_m tmp4_m
|
||||
%endif
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
section .text
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
%define xmask0f xmm8
|
||||
%define xgft1_lo xmm7
|
||||
%define xgft1_hi xmm6
|
||||
%define xgft2_lo xmm5
|
||||
%define xgft2_hi xmm4
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%else ;32-bit code
|
||||
%define xmask0f xmm4
|
||||
%define xgft1_lo xmm7
|
||||
%define xgft1_hi xmm6
|
||||
%define xgft2_lo xgft1_lo
|
||||
%define xgft2_hi xgft1_hi
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%endif
|
||||
|
||||
align 16
|
||||
global GF_2VECT_DOT_PROD_AVX:function
|
||||
|
||||
func(GF_2VECT_DOT_PROD_AVX)
|
||||
FUNC_SAVE
|
||||
SLDR len, len_m
|
||||
sub len, 16
|
||||
SSTR len_m, len
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
SLDR dest1, dest1_m
|
||||
mov dest2, [dest1+PS]
|
||||
SSTR dest2_m, dest2
|
||||
mov dest1, [dest1]
|
||||
SSTR dest1_m, dest1
|
||||
|
||||
.loop16
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect
|
||||
SLDR src, src_m
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
%ifidn PS,4 ; 32-bit code
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
SLDR dest1, dest1_m
|
||||
SLDR dest2, dest2_m
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
|
||||
SLDR len, len_m
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_2VECT_DOT_PROD_AVX, 02, 04, 0191
|
398
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_2vect_dot_prod_avx2.asm
generated
vendored
398
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_2vect_dot_prod_avx2.asm
generated
vendored
|
@ -1,398 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_2vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_2VECT_DOT_PROD_AVX2 _gf_2vect_dot_prod_avx2
|
||||
%else
|
||||
%define GF_2VECT_DOT_PROD_AVX2 gf_2vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 3*16 + 3*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqa [rsp + 0*16], xmm6
|
||||
vmovdqa [rsp + 1*16], xmm7
|
||||
vmovdqa [rsp + 2*16], xmm8
|
||||
save_reg r12, 3*16 + 0*8
|
||||
save_reg r13, 3*16 + 1*8
|
||||
save_reg r14, 3*16 + 2*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
mov r12, [rsp + 3*16 + 0*8]
|
||||
mov r13, [rsp + 3*16 + 1*8]
|
||||
mov r14, [rsp + 3*16 + 2*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
;;;================== High Address;
|
||||
;;; arg4
|
||||
;;; arg3
|
||||
;;; arg2
|
||||
;;; arg1
|
||||
;;; arg0
|
||||
;;; return
|
||||
;;;<================= esp of caller
|
||||
;;; ebp
|
||||
;;;<================= ebp = esp
|
||||
;;; var0
|
||||
;;; esi
|
||||
;;; edi
|
||||
;;; ebx
|
||||
;;;<================= esp of callee
|
||||
;;;
|
||||
;;;================== Low Address;
|
||||
|
||||
%define PS 4
|
||||
%define LOG_PS 2
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp + PS*2 + PS*x]
|
||||
%define var(x) [ebp - PS - PS*x]
|
||||
|
||||
%define trans ecx
|
||||
%define trans2 esi
|
||||
%define arg0 trans ;trans and trans2 are for the variables in stack
|
||||
%define arg0_m arg(0)
|
||||
%define arg1 ebx
|
||||
%define arg2 arg2_m
|
||||
%define arg2_m arg(2)
|
||||
%define arg3 trans
|
||||
%define arg3_m arg(3)
|
||||
%define arg4 trans
|
||||
%define arg4_m arg(4)
|
||||
%define tmp edx
|
||||
%define tmp.w edx
|
||||
%define tmp.b dl
|
||||
%define tmp2 edi
|
||||
%define tmp3 trans2
|
||||
%define tmp4 trans2
|
||||
%define tmp4_m var(0)
|
||||
%define return eax
|
||||
%macro SLDR 2 ;stack load/restore
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
sub esp, PS*1 ;1 local variable
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg1, arg(1)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
add esp, PS*1 ;1 local variable
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
|
||||
%define vec_i tmp2
|
||||
%define ptr tmp3
|
||||
%define dest2 tmp4
|
||||
%define pos return
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
%define len_m arg0_m
|
||||
%define src_m arg3_m
|
||||
%define dest1_m arg4_m
|
||||
%define dest2_m tmp4_m
|
||||
%endif
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
section .text
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
%define xmask0f ymm8
|
||||
%define xmask0fx xmm8
|
||||
%define xgft1_lo ymm7
|
||||
%define xgft1_hi ymm6
|
||||
%define xgft2_lo ymm5
|
||||
%define xgft2_hi ymm4
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
%else ;32-bit code
|
||||
%define xmask0f ymm7
|
||||
%define xmask0fx xmm7
|
||||
%define xgft1_lo ymm5
|
||||
%define xgft1_hi ymm4
|
||||
%define xgft2_lo xgft1_lo
|
||||
%define xgft2_hi xgft1_hi
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
|
||||
%endif
|
||||
|
||||
align 16
|
||||
global GF_2VECT_DOT_PROD_AVX2:function
|
||||
|
||||
func(GF_2VECT_DOT_PROD_AVX2)
|
||||
FUNC_SAVE
|
||||
SLDR len, len_m
|
||||
sub len, 32
|
||||
SSTR len_m, len
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
SLDR dest1, dest1_m
|
||||
mov dest2, [dest1+PS]
|
||||
SSTR dest2_m, dest2
|
||||
mov dest1, [dest1]
|
||||
SSTR dest1_m, dest1
|
||||
|
||||
.loop32
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect
|
||||
SLDR src, src_m
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%else
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
%endif
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
%ifidn PS,4 ; 32-bit code
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
SLDR dest1, dest1_m
|
||||
SLDR dest2, dest2_m
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
|
||||
SLDR len, len_m
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop32 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_2VECT_DOT_PROD_AVX2, 04, 04, 0196
|
383
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_2vect_dot_prod_sse.asm
generated
vendored
383
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_2vect_dot_prod_sse.asm
generated
vendored
|
@ -1,383 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_2vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_2VECT_DOT_PROD_SSE _gf_2vect_dot_prod_sse
|
||||
%else
|
||||
%define GF_2VECT_DOT_PROD_SSE gf_2vect_dot_prod_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 3*16 + 3*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_reg r12, 3*16 + 0*8
|
||||
save_reg r13, 3*16 + 1*8
|
||||
save_reg r14, 3*16 + 2*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
mov r12, [rsp + 3*16 + 0*8]
|
||||
mov r13, [rsp + 3*16 + 1*8]
|
||||
mov r14, [rsp + 3*16 + 2*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
;;;================== High Address;
|
||||
;;; arg4
|
||||
;;; arg3
|
||||
;;; arg2
|
||||
;;; arg1
|
||||
;;; arg0
|
||||
;;; return
|
||||
;;;<================= esp of caller
|
||||
;;; ebp
|
||||
;;;<================= ebp = esp
|
||||
;;; var0
|
||||
;;; esi
|
||||
;;; edi
|
||||
;;; ebx
|
||||
;;;<================= esp of callee
|
||||
;;;
|
||||
;;;================== Low Address;
|
||||
|
||||
%define PS 4
|
||||
%define LOG_PS 2
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp + PS*2 + PS*x]
|
||||
%define var(x) [ebp - PS - PS*x]
|
||||
|
||||
%define trans ecx
|
||||
%define trans2 esi
|
||||
%define arg0 trans ;trans and trans2 are for the variables in stack
|
||||
%define arg0_m arg(0)
|
||||
%define arg1 ebx
|
||||
%define arg2 arg2_m
|
||||
%define arg2_m arg(2)
|
||||
%define arg3 trans
|
||||
%define arg3_m arg(3)
|
||||
%define arg4 trans
|
||||
%define arg4_m arg(4)
|
||||
%define tmp edx
|
||||
%define tmp2 edi
|
||||
%define tmp3 trans2
|
||||
%define tmp4 trans2
|
||||
%define tmp4_m var(0)
|
||||
%define return eax
|
||||
%macro SLDR 2 ;; stack load/restore
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
sub esp, PS*1 ;1 local variable
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg1, arg(1)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
add esp, PS*1 ;1 local variable
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
|
||||
%define vec_i tmp2
|
||||
%define ptr tmp3
|
||||
%define dest2 tmp4
|
||||
%define pos return
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
%define len_m arg0_m
|
||||
%define src_m arg3_m
|
||||
%define dest1_m arg4_m
|
||||
%define dest2_m tmp4_m
|
||||
%endif
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
section .text
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
%define xmask0f xmm8
|
||||
%define xgft1_lo xmm7
|
||||
%define xgft1_hi xmm6
|
||||
%define xgft2_lo xmm5
|
||||
%define xgft2_hi xmm4
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%else ;32-bit code
|
||||
%define xmask0f xmm4
|
||||
%define xgft1_lo xmm7
|
||||
%define xgft1_hi xmm6
|
||||
%define xgft2_lo xgft1_lo
|
||||
%define xgft2_hi xgft1_hi
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%endif
|
||||
|
||||
align 16
|
||||
global GF_2VECT_DOT_PROD_SSE:function
|
||||
|
||||
func(GF_2VECT_DOT_PROD_SSE)
|
||||
FUNC_SAVE
|
||||
SLDR len, len_m
|
||||
sub len, 16
|
||||
SSTR len_m, len
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
SLDR dest1, dest1_m
|
||||
mov dest2, [dest1+PS]
|
||||
SSTR dest2_m, dest2
|
||||
mov dest1, [dest1]
|
||||
SSTR dest1_m, dest1
|
||||
|
||||
.loop16
|
||||
pxor xp1, xp1
|
||||
pxor xp2, xp2
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect
|
||||
SLDR src, src_m
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
%ifidn PS,8 ;64-bit code
|
||||
movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
SLDR dest1, dest1_m
|
||||
SLDR dest2, dest2_m
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
|
||||
SLDR len, len_m
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_2VECT_DOT_PROD_SSE, 00, 03, 0062
|
264
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_2vect_mad_avx.asm
generated
vendored
264
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_2vect_mad_avx.asm
generated
vendored
|
@ -1,264 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_2vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_2VECT_MAD_AVX _gf_2vect_mad_avx
|
||||
%else
|
||||
%define GF_2VECT_MAD_AVX gf_2vect_mad_avx
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*9 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
movdqa [rsp+16*3],xmm9
|
||||
movdqa [rsp+16*4],xmm10
|
||||
movdqa [rsp+16*5],xmm11
|
||||
movdqa [rsp+16*6],xmm12
|
||||
movdqa [rsp+16*7],xmm13
|
||||
movdqa [rsp+16*8],xmm14
|
||||
save_reg r12, 9*16 + 0*8
|
||||
save_reg r15, 9*16 + 1*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
movdqa xmm9, [rsp+16*3]
|
||||
movdqa xmm10, [rsp+16*4]
|
||||
movdqa xmm11, [rsp+16*5]
|
||||
movdqa xmm12, [rsp+16*6]
|
||||
movdqa xmm13, [rsp+16*7]
|
||||
movdqa xmm14, [rsp+16*8]
|
||||
mov r12, [rsp + 9*16 + 0*8]
|
||||
mov r15, [rsp + 9*16 + 1*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
;;; gf_2vect_mad_avx(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 tmp2
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm14
|
||||
%define xgft1_lo xmm13
|
||||
%define xgft1_hi xmm12
|
||||
%define xgft2_lo xmm11
|
||||
%define xgft2_hi xmm10
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xtmph1 xmm2
|
||||
%define xtmpl1 xmm3
|
||||
%define xtmph2 xmm4
|
||||
%define xtmpl2 xmm5
|
||||
%define xd1 xmm6
|
||||
%define xd2 xmm7
|
||||
%define xtmpd1 xmm8
|
||||
%define xtmpd2 xmm9
|
||||
|
||||
|
||||
align 16
|
||||
global GF_2VECT_MAD_AVX:function
|
||||
|
||||
func(GF_2VECT_MAD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
sal vec, 5
|
||||
lea tmp, [mul_array + vec_i]
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
vmovdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
XLDR xtmpd1, [dest1+len] ;backup the last 16 bytes in dest
|
||||
XLDR xtmpd2, [dest2+len] ;backup the last 16 bytes in dest
|
||||
|
||||
.loop16
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
XLDR xd2, [dest2+pos] ;Get next dest vector
|
||||
.loop16_overlap:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
|
||||
vpxor xd1, xd1, xtmph1 ;xd1 += partial
|
||||
|
||||
vpshufb xtmph2, xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl2, xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
|
||||
vpxor xd2, xd2, xtmph2 ;xd2 += partial
|
||||
|
||||
XSTR [dest1+pos], xd1
|
||||
XSTR [dest2+pos], xd2
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
vmovdqa xd1, xtmpd1 ;Restore xd1
|
||||
vmovdqa xd2, xtmpd2 ;Restore xd2
|
||||
jmp .loop16_overlap ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_2VECT_MAD_AVX, 02, 00, 0204
|
280
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_2vect_mad_avx2.asm
generated
vendored
280
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_2vect_mad_avx2.asm
generated
vendored
|
@ -1,280 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_2vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_2VECT_MAD_AVX2 _gf_2vect_mad_avx2
|
||||
%else
|
||||
%define GF_2VECT_MAD_AVX2 gf_2vect_mad_avx2
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*9 + 3*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
vmovdqa [rsp+16*0],xmm6
|
||||
vmovdqa [rsp+16*1],xmm7
|
||||
vmovdqa [rsp+16*2],xmm8
|
||||
vmovdqa [rsp+16*3],xmm9
|
||||
vmovdqa [rsp+16*4],xmm10
|
||||
vmovdqa [rsp+16*5],xmm11
|
||||
vmovdqa [rsp+16*6],xmm12
|
||||
vmovdqa [rsp+16*7],xmm13
|
||||
vmovdqa [rsp+16*8],xmm14
|
||||
save_reg r12, 9*16 + 0*8
|
||||
save_reg r15, 9*16 + 1*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp+16*0]
|
||||
vmovdqa xmm7, [rsp+16*1]
|
||||
vmovdqa xmm8, [rsp+16*2]
|
||||
vmovdqa xmm9, [rsp+16*3]
|
||||
vmovdqa xmm10, [rsp+16*4]
|
||||
vmovdqa xmm11, [rsp+16*5]
|
||||
vmovdqa xmm12, [rsp+16*6]
|
||||
vmovdqa xmm13, [rsp+16*7]
|
||||
vmovdqa xmm14, [rsp+16*8]
|
||||
mov r12, [rsp + 9*16 + 0*8]
|
||||
mov r15, [rsp + 9*16 + 1*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
;;; gf_2vect_mad_avx2(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 tmp2
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm14
|
||||
%define xmask0fx xmm14
|
||||
%define xgft1_lo ymm13
|
||||
%define xgft1_hi ymm12
|
||||
%define xgft2_lo ymm11
|
||||
%define xgft2_hi ymm10
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xtmph1 ymm2
|
||||
%define xtmpl1 ymm3
|
||||
%define xtmph2 ymm4
|
||||
%define xtmpl2 ymm5
|
||||
%define xd1 ymm6
|
||||
%define xd2 ymm7
|
||||
%define xtmpd1 ymm8
|
||||
%define xtmpd2 ymm9
|
||||
|
||||
align 16
|
||||
global GF_2VECT_MAD_AVX2:function
|
||||
|
||||
func(GF_2VECT_MAD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
sal vec, 5
|
||||
lea tmp, [mul_array + vec_i]
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
|
||||
mov dest2, [dest1+PS] ; reuse mul_array
|
||||
mov dest1, [dest1]
|
||||
|
||||
XLDR xtmpd1, [dest1+len] ;backup the last 16 bytes in dest
|
||||
XLDR xtmpd2, [dest2+len] ;backup the last 16 bytes in dest
|
||||
|
||||
.loop32
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
XLDR xd2, [dest2+pos] ;Get next dest vector
|
||||
.loop32_overlap:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
|
||||
vpxor xd1, xd1, xtmph1 ;xd1 += partial
|
||||
|
||||
vpshufb xtmph2, xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl2, xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
|
||||
vpxor xd2, xd2, xtmph2 ;xd2 += partial
|
||||
|
||||
XSTR [dest1+pos], xd1
|
||||
XSTR [dest2+pos], xd2
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-32
|
||||
vmovdqa xd1, xtmpd1 ;Restore xd1
|
||||
vmovdqa xd2, xtmpd2 ;Restore xd2
|
||||
jmp .loop32_overlap ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_2VECT_MAD_AVX2, 04, 00, 0205
|
267
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_2vect_mad_sse.asm
generated
vendored
267
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_2vect_mad_sse.asm
generated
vendored
|
@ -1,267 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_2vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_2VECT_MAD_SSE _gf_2vect_mad_sse
|
||||
%else
|
||||
%define GF_2VECT_MAD_SSE gf_2vect_mad_sse
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*9 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
movdqa [rsp+16*3],xmm9
|
||||
movdqa [rsp+16*4],xmm10
|
||||
movdqa [rsp+16*5],xmm11
|
||||
movdqa [rsp+16*6],xmm12
|
||||
movdqa [rsp+16*7],xmm13
|
||||
movdqa [rsp+16*8],xmm14
|
||||
save_reg r12, 9*16 + 0*8
|
||||
save_reg r15, 9*16 + 1*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
movdqa xmm9, [rsp+16*3]
|
||||
movdqa xmm10, [rsp+16*4]
|
||||
movdqa xmm11, [rsp+16*5]
|
||||
movdqa xmm12, [rsp+16*6]
|
||||
movdqa xmm13, [rsp+16*7]
|
||||
movdqa xmm14, [rsp+16*8]
|
||||
mov r12, [rsp + 9*16 + 0*8]
|
||||
mov r15, [rsp + 9*16 + 1*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
;;; gf_2vect_mad_sse(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 tmp2
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm14
|
||||
%define xgft1_lo xmm13
|
||||
%define xgft1_hi xmm12
|
||||
%define xgft2_lo xmm11
|
||||
%define xgft2_hi xmm10
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xtmph1 xmm2
|
||||
%define xtmpl1 xmm3
|
||||
%define xtmph2 xmm4
|
||||
%define xtmpl2 xmm5
|
||||
%define xd1 xmm6
|
||||
%define xd2 xmm7
|
||||
%define xtmpd1 xmm8
|
||||
%define xtmpd2 xmm9
|
||||
|
||||
|
||||
align 16
|
||||
global GF_2VECT_MAD_SSE:function
|
||||
func(GF_2VECT_MAD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
sal vec, 5
|
||||
lea tmp, [mul_array + vec_i]
|
||||
movdqu xgft1_lo,[tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
movdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
movdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
XLDR xtmpd1, [dest1+len] ;backup the last 16 bytes in dest
|
||||
XLDR xtmpd2, [dest2+len] ;backup the last 16 bytes in dest
|
||||
|
||||
.loop16:
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
XLDR xd2, [dest2+pos] ;Get next dest vector
|
||||
.loop16_overlap:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
movdqa xtmph1, xgft1_hi ;Reload const array registers
|
||||
movdqa xtmpl1, xgft1_lo
|
||||
movdqa xtmph2, xgft2_hi ;Reload const array registers
|
||||
movdqa xtmpl2, xgft2_lo
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
pshufb xtmph1, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph1, xtmpl1 ;GF add high and low partials
|
||||
pxor xd1, xtmph1
|
||||
|
||||
pshufb xtmph2, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph2, xtmpl2 ;GF add high and low partials
|
||||
pxor xd2, xtmph2
|
||||
|
||||
XSTR [dest1+pos], xd1 ;Store result
|
||||
XSTR [dest2+pos], xd2 ;Store result
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
movdqa xd1, xtmpd1 ;Restore xd1
|
||||
movdqa xd2, xtmpd2 ;Restore xd2
|
||||
jmp .loop16_overlap ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
mask0f:
|
||||
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_2VECT_MAD_SSE, 00, 00, 0203
|
421
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_3vect_dot_prod_avx.asm
generated
vendored
421
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_3vect_dot_prod_avx.asm
generated
vendored
|
@ -1,421 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_3vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_3VECT_DOT_PROD_AVX _gf_3vect_dot_prod_avx
|
||||
%else
|
||||
%define GF_3VECT_DOT_PROD_AVX gf_3vect_dot_prod_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_reg r12, 6*16 + 0*8
|
||||
save_reg r13, 6*16 + 1*8
|
||||
save_reg r14, 6*16 + 2*8
|
||||
save_reg r15, 6*16 + 3*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
mov r12, [rsp + 6*16 + 0*8]
|
||||
mov r13, [rsp + 6*16 + 1*8]
|
||||
mov r14, [rsp + 6*16 + 2*8]
|
||||
mov r15, [rsp + 6*16 + 3*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
;;;================== High Address;
|
||||
;;; arg4
|
||||
;;; arg3
|
||||
;;; arg2
|
||||
;;; arg1
|
||||
;;; arg0
|
||||
;;; return
|
||||
;;;<================= esp of caller
|
||||
;;; ebp
|
||||
;;;<================= ebp = esp
|
||||
;;; var0
|
||||
;;; var1
|
||||
;;; esi
|
||||
;;; edi
|
||||
;;; ebx
|
||||
;;;<================= esp of callee
|
||||
;;;
|
||||
;;;================== Low Address;
|
||||
|
||||
%define PS 4
|
||||
%define LOG_PS 2
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp + PS*2 + PS*x]
|
||||
%define var(x) [ebp - PS - PS*x]
|
||||
|
||||
%define trans ecx
|
||||
%define trans2 esi
|
||||
%define arg0 trans ;trans and trans2 are for the variables in stack
|
||||
%define arg0_m arg(0)
|
||||
%define arg1 ebx
|
||||
%define arg2 arg2_m
|
||||
%define arg2_m arg(2)
|
||||
%define arg3 trans
|
||||
%define arg3_m arg(3)
|
||||
%define arg4 trans
|
||||
%define arg4_m arg(4)
|
||||
%define arg5 trans2
|
||||
%define tmp edx
|
||||
%define tmp2 edi
|
||||
%define tmp3 trans2
|
||||
%define tmp3_m var(0)
|
||||
%define tmp4 trans2
|
||||
%define tmp4_m var(1)
|
||||
%define return eax
|
||||
%macro SLDR 2 ;; stack load/restore
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
sub esp, PS*2 ;2 local variables
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg1, arg(1)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
add esp, PS*2 ;2 local variables
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
%define ptr arg5
|
||||
|
||||
%define vec_i tmp2
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp4
|
||||
%define pos return
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
%define len_m arg0_m
|
||||
%define src_m arg3_m
|
||||
%define dest1_m arg4_m
|
||||
%define dest2_m tmp3_m
|
||||
%define dest3_m tmp4_m
|
||||
%endif
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
|
||||
section .text
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
%define xmask0f xmm11
|
||||
%define xgft1_lo xmm10
|
||||
%define xgft1_hi xmm9
|
||||
%define xgft2_lo xmm8
|
||||
%define xgft2_hi xmm7
|
||||
%define xgft3_lo xmm6
|
||||
%define xgft3_hi xmm5
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%else
|
||||
%define xmask0f xmm7
|
||||
%define xgft1_lo xmm6
|
||||
%define xgft1_hi xmm5
|
||||
%define xgft2_lo xgft1_lo
|
||||
%define xgft2_hi xgft1_hi
|
||||
%define xgft3_lo xgft1_lo
|
||||
%define xgft3_hi xgft1_hi
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%endif
|
||||
|
||||
align 16
|
||||
global GF_3VECT_DOT_PROD_AVX:function
|
||||
func(GF_3VECT_DOT_PROD_AVX)
|
||||
FUNC_SAVE
|
||||
SLDR len, len_m
|
||||
sub len, 16
|
||||
SSTR len_m, len
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
SLDR dest1, dest1_m
|
||||
mov dest2, [dest1+PS]
|
||||
SSTR dest2_m, dest2
|
||||
mov dest3, [dest1+2*PS]
|
||||
SSTR dest3_m, dest3
|
||||
mov dest1, [dest1]
|
||||
SSTR dest1_m, dest1
|
||||
|
||||
.loop16:
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
SLDR src, src_m
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
%ifidn PS,4 ; 32-bit code
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
%endif
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
%ifidn PS,4 ; 32-bit code
|
||||
sal vec, 1
|
||||
vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
sar vec, 1
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
SLDR dest1, dest1_m
|
||||
SLDR dest2, dest2_m
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
SLDR dest3, dest3_m
|
||||
XSTR [dest3+pos], xp3
|
||||
|
||||
SLDR len, len_m
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_3VECT_DOT_PROD_AVX, 02, 04, 0192
|
441
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_3vect_dot_prod_avx2.asm
generated
vendored
441
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_3vect_dot_prod_avx2.asm
generated
vendored
|
@ -1,441 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_3vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_3VECT_DOT_PROD_AVX2 _gf_3vect_dot_prod_avx2
|
||||
%else
|
||||
%define GF_3VECT_DOT_PROD_AVX2 gf_3vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqa [rsp + 0*16], xmm6
|
||||
vmovdqa [rsp + 1*16], xmm7
|
||||
vmovdqa [rsp + 2*16], xmm8
|
||||
vmovdqa [rsp + 3*16], xmm9
|
||||
vmovdqa [rsp + 4*16], xmm10
|
||||
vmovdqa [rsp + 5*16], xmm11
|
||||
save_reg r12, 6*16 + 0*8
|
||||
save_reg r13, 6*16 + 1*8
|
||||
save_reg r14, 6*16 + 2*8
|
||||
save_reg r15, 6*16 + 3*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
mov r12, [rsp + 6*16 + 0*8]
|
||||
mov r13, [rsp + 6*16 + 1*8]
|
||||
mov r14, [rsp + 6*16 + 2*8]
|
||||
mov r15, [rsp + 6*16 + 3*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
;;;================== High Address;
|
||||
;;; arg4
|
||||
;;; arg3
|
||||
;;; arg2
|
||||
;;; arg1
|
||||
;;; arg0
|
||||
;;; return
|
||||
;;;<================= esp of caller
|
||||
;;; ebp
|
||||
;;;<================= ebp = esp
|
||||
;;; var0
|
||||
;;; var1
|
||||
;;; esi
|
||||
;;; edi
|
||||
;;; ebx
|
||||
;;;<================= esp of callee
|
||||
;;;
|
||||
;;;================== Low Address;
|
||||
|
||||
%define PS 4
|
||||
%define LOG_PS 2
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp + PS*2 + PS*x]
|
||||
%define var(x) [ebp - PS - PS*x]
|
||||
|
||||
%define trans ecx
|
||||
%define trans2 esi
|
||||
%define arg0 trans ;trans and trans2 are for the variables in stack
|
||||
%define arg0_m arg(0)
|
||||
%define arg1 ebx
|
||||
%define arg2 arg2_m
|
||||
%define arg2_m arg(2)
|
||||
%define arg3 trans
|
||||
%define arg3_m arg(3)
|
||||
%define arg4 trans
|
||||
%define arg4_m arg(4)
|
||||
%define arg5 trans2
|
||||
%define tmp edx
|
||||
%define tmp.w edx
|
||||
%define tmp.b dl
|
||||
%define tmp2 edi
|
||||
%define tmp3 trans2
|
||||
%define tmp3_m var(0)
|
||||
%define tmp4 trans2
|
||||
%define tmp4_m var(1)
|
||||
%define return eax
|
||||
%macro SLDR 2 ;stack load/restore
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
sub esp, PS*2 ;2 local variables
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg1, arg(1)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
add esp, PS*2 ;2 local variables
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
%define ptr arg5
|
||||
|
||||
%define vec_i tmp2
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp4
|
||||
%define pos return
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
%define len_m arg0_m
|
||||
%define src_m arg3_m
|
||||
%define dest1_m arg4_m
|
||||
%define dest2_m tmp3_m
|
||||
%define dest3_m tmp4_m
|
||||
%endif
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
section .text
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
%define xmask0f ymm11
|
||||
%define xmask0fx xmm11
|
||||
%define xgft1_lo ymm10
|
||||
%define xgft1_hi ymm9
|
||||
%define xgft2_lo ymm8
|
||||
%define xgft2_hi ymm7
|
||||
%define xgft3_lo ymm6
|
||||
%define xgft3_hi ymm5
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
%define xp3 ymm4
|
||||
%else
|
||||
%define xmask0f ymm7
|
||||
%define xmask0fx xmm7
|
||||
%define xgft1_lo ymm6
|
||||
%define xgft1_hi ymm5
|
||||
%define xgft2_lo xgft1_lo
|
||||
%define xgft2_hi xgft1_hi
|
||||
%define xgft3_lo xgft1_lo
|
||||
%define xgft3_hi xgft1_hi
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
%define xp3 ymm4
|
||||
|
||||
%endif
|
||||
|
||||
align 16
|
||||
global GF_3VECT_DOT_PROD_AVX2:function
|
||||
func(GF_3VECT_DOT_PROD_AVX2)
|
||||
FUNC_SAVE
|
||||
SLDR len, len_m
|
||||
sub len, 32
|
||||
SSTR len_m, len
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
SLDR dest1, dest1_m
|
||||
mov dest2, [dest1+PS]
|
||||
SSTR dest2_m, dest2
|
||||
mov dest3, [dest1+2*PS]
|
||||
SSTR dest3_m, dest3
|
||||
mov dest1, [dest1]
|
||||
SSTR dest1_m, dest1
|
||||
|
||||
.loop32:
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
SLDR src, src_m
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
%ifidn PS,4 ; 32-bit code
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
|
||||
%endif
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
%ifidn PS,4 ; 32-bit code
|
||||
sal vec, 1
|
||||
vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo
|
||||
sar vec, 1
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
SLDR dest1, dest1_m
|
||||
SLDR dest2, dest2_m
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
SLDR dest3, dest3_m
|
||||
XSTR [dest3+pos], xp3
|
||||
|
||||
SLDR len, len_m
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop32 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_3VECT_DOT_PROD_AVX2, 04, 04, 0197
|
422
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_3vect_dot_prod_sse.asm
generated
vendored
422
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_3vect_dot_prod_sse.asm
generated
vendored
|
@ -1,422 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_3vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_3VECT_DOT_PROD_SSE _gf_3vect_dot_prod_sse
|
||||
%else
|
||||
%define GF_3VECT_DOT_PROD_SSE gf_3vect_dot_prod_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_reg r12, 6*16 + 0*8
|
||||
save_reg r13, 6*16 + 1*8
|
||||
save_reg r14, 6*16 + 2*8
|
||||
save_reg r15, 6*16 + 3*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
movdqa xmm9, [rsp + 3*16]
|
||||
movdqa xmm10, [rsp + 4*16]
|
||||
movdqa xmm11, [rsp + 5*16]
|
||||
mov r12, [rsp + 6*16 + 0*8]
|
||||
mov r13, [rsp + 6*16 + 1*8]
|
||||
mov r14, [rsp + 6*16 + 2*8]
|
||||
mov r15, [rsp + 6*16 + 3*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
;;;================== High Address;
|
||||
;;; arg4
|
||||
;;; arg3
|
||||
;;; arg2
|
||||
;;; arg1
|
||||
;;; arg0
|
||||
;;; return
|
||||
;;;<================= esp of caller
|
||||
;;; ebp
|
||||
;;;<================= ebp = esp
|
||||
;;; var0
|
||||
;;; var1
|
||||
;;; esi
|
||||
;;; edi
|
||||
;;; ebx
|
||||
;;;<================= esp of callee
|
||||
;;;
|
||||
;;;================== Low Address;
|
||||
|
||||
%define PS 4
|
||||
%define LOG_PS 2
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp + PS*2 + PS*x]
|
||||
%define var(x) [ebp - PS - PS*x]
|
||||
|
||||
%define trans ecx
|
||||
%define trans2 esi
|
||||
%define arg0 trans ;trans and trans2 are for the variables in stack
|
||||
%define arg0_m arg(0)
|
||||
%define arg1 ebx
|
||||
%define arg2 arg2_m
|
||||
%define arg2_m arg(2)
|
||||
%define arg3 trans
|
||||
%define arg3_m arg(3)
|
||||
%define arg4 trans
|
||||
%define arg4_m arg(4)
|
||||
%define arg5 trans2
|
||||
%define tmp edx
|
||||
%define tmp2 edi
|
||||
%define tmp3 trans2
|
||||
%define tmp3_m var(0)
|
||||
%define tmp4 trans2
|
||||
%define tmp4_m var(1)
|
||||
%define return eax
|
||||
%macro SLDR 2 ;; stack load/restore
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
sub esp, PS*2 ;2 local variables
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg1, arg(1)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
add esp, PS*2 ;2 local variables
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
%define ptr arg5
|
||||
|
||||
%define vec_i tmp2
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp4
|
||||
%define pos return
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
%define len_m arg0_m
|
||||
%define src_m arg3_m
|
||||
%define dest1_m arg4_m
|
||||
%define dest2_m tmp3_m
|
||||
%define dest3_m tmp4_m
|
||||
%endif
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
|
||||
section .text
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
%define xmask0f xmm11
|
||||
%define xgft1_lo xmm2
|
||||
%define xgft1_hi xmm3
|
||||
%define xgft2_lo xmm4
|
||||
%define xgft2_hi xmm7
|
||||
%define xgft3_lo xmm6
|
||||
%define xgft3_hi xmm5
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm10
|
||||
%define xp2 xmm9
|
||||
%define xp3 xmm8
|
||||
%else
|
||||
%define xmask0f xmm7
|
||||
%define xgft1_lo xmm6
|
||||
%define xgft1_hi xmm5
|
||||
%define xgft2_lo xgft1_lo
|
||||
%define xgft2_hi xgft1_hi
|
||||
%define xgft3_lo xgft1_lo
|
||||
%define xgft3_hi xgft1_hi
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%endif
|
||||
|
||||
align 16
|
||||
global GF_3VECT_DOT_PROD_SSE:function
|
||||
func(GF_3VECT_DOT_PROD_SSE)
|
||||
FUNC_SAVE
|
||||
SLDR len, len_m
|
||||
sub len, 16
|
||||
SSTR len_m, len
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
SLDR dest1, dest1_m
|
||||
mov dest2, [dest1+PS]
|
||||
SSTR dest2_m, dest2
|
||||
mov dest3, [dest1+2*PS]
|
||||
SSTR dest3_m, dest3
|
||||
mov dest1, [dest1]
|
||||
SSTR dest1_m, dest1
|
||||
|
||||
.loop16:
|
||||
pxor xp1, xp1
|
||||
pxor xp2, xp2
|
||||
pxor xp3, xp3
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
SLDR src, src_m
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
%ifidn PS,8 ;64-bit code
|
||||
movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
movdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
%endif
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
sal vec, 1
|
||||
movdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
sar vec, 1
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
pxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
SLDR dest1, dest1_m
|
||||
SLDR dest2, dest2_m
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
SLDR dest3, dest3_m
|
||||
XSTR [dest3+pos], xp3
|
||||
|
||||
SLDR len, len_m
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_3VECT_DOT_PROD_SSE, 00, 05, 0063
|
315
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_3vect_mad_avx.asm
generated
vendored
315
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_3vect_mad_avx.asm
generated
vendored
|
@ -1,315 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_3vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_3VECT_MAD_AVX _gf_3vect_mad_avx
|
||||
%else
|
||||
%define GF_3VECT_MAD_AVX gf_3vect_mad_avx
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*10 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
vmovdqa [rsp+16*0],xmm6
|
||||
vmovdqa [rsp+16*1],xmm7
|
||||
vmovdqa [rsp+16*2],xmm8
|
||||
vmovdqa [rsp+16*3],xmm9
|
||||
vmovdqa [rsp+16*4],xmm10
|
||||
vmovdqa [rsp+16*5],xmm11
|
||||
vmovdqa [rsp+16*6],xmm12
|
||||
vmovdqa [rsp+16*7],xmm13
|
||||
vmovdqa [rsp+16*8],xmm14
|
||||
vmovdqa [rsp+16*9],xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r15, 10*16 + 1*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp+16*0]
|
||||
vmovdqa xmm7, [rsp+16*1]
|
||||
vmovdqa xmm8, [rsp+16*2]
|
||||
vmovdqa xmm9, [rsp+16*3]
|
||||
vmovdqa xmm10, [rsp+16*4]
|
||||
vmovdqa xmm11, [rsp+16*5]
|
||||
vmovdqa xmm12, [rsp+16*6]
|
||||
vmovdqa xmm13, [rsp+16*7]
|
||||
vmovdqa xmm14, [rsp+16*8]
|
||||
vmovdqa xmm15, [rsp+16*9]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r15, [rsp + 10*16 + 1*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
;;; gf_3vect_mad_avx(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 mul_array
|
||||
%define dest3 vec_i
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft1_lo xmm14
|
||||
%define xgft1_hi xmm13
|
||||
%define xgft2_lo xmm12
|
||||
%define xgft2_hi xmm11
|
||||
%define xgft3_lo xmm10
|
||||
%define xgft3_hi xmm9
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xtmph1 xmm2
|
||||
%define xtmpl1 xmm3
|
||||
%define xtmph2 xmm4
|
||||
%define xtmpl2 xmm5
|
||||
%define xtmph3 xmm6
|
||||
%define xtmpl3 xmm7
|
||||
%define xd1 xmm8
|
||||
%define xd2 xtmpl1
|
||||
%define xd3 xtmph1
|
||||
|
||||
align 16
|
||||
global GF_3VECT_MAD_AVX:function
|
||||
func(GF_3VECT_MAD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
sal vec, 5
|
||||
lea tmp, [mul_array + vec_i]
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
vmovdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
vmovdqu xgft3_hi, [tmp+2*vec+16]; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
mov dest2, [dest1+PS] ; reuse mul_array
|
||||
mov dest3, [dest1+2*PS] ; reuse vec_i
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop16:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
|
||||
vpxor xd1, xd1, xtmph1 ;xd1 += partial
|
||||
|
||||
XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector
|
||||
XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest vector
|
||||
|
||||
; dest2
|
||||
vpshufb xtmph2, xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl2, xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
|
||||
vpxor xd2, xd2, xtmph2 ;xd2 += partial
|
||||
|
||||
; dest3
|
||||
vpshufb xtmph3, xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl3, xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
|
||||
vpxor xd3, xd3, xtmph3 ;xd3 += partial
|
||||
|
||||
XSTR [dest1+pos], xd1
|
||||
XSTR [dest2+pos], xd2
|
||||
XSTR [dest3+pos], xd3
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
.lessthan16:
|
||||
;; Tail len
|
||||
;; Do one more overlap pass
|
||||
mov tmp, len ;Overlapped offset length-16
|
||||
XLDR x0, [src+tmp] ;Get next source vector
|
||||
XLDR xd1, [dest1+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector
|
||||
XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest vector
|
||||
|
||||
sub len, pos
|
||||
|
||||
movdqa xtmph3, [constip16] ;Load const of i + 16
|
||||
vpinsrb xtmpl3, xtmpl3, len.w, 15
|
||||
vpshufb xtmpl3, xtmpl3, xmask0f ;Broadcast len to all bytes
|
||||
vpcmpgtb xtmpl3, xtmpl3, xtmph3
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpand xgft1_hi, xgft1_hi, xtmpl3
|
||||
vpxor xd1, xd1, xgft1_hi
|
||||
|
||||
; dest2
|
||||
vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpand xgft2_hi, xgft2_hi, xtmpl3
|
||||
vpxor xd2, xd2, xgft2_hi
|
||||
|
||||
; dest3
|
||||
vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpand xgft3_hi, xgft3_hi, xtmpl3
|
||||
vpxor xd3, xd3, xgft3_hi
|
||||
|
||||
XSTR [dest1+tmp], xd1
|
||||
XSTR [dest2+tmp], xd2
|
||||
XSTR [dest3+tmp], xd3
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
constip16:
|
||||
ddq 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_3VECT_MAD_AVX, 02, 00, 0207
|
347
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_3vect_mad_avx2.asm
generated
vendored
347
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_3vect_mad_avx2.asm
generated
vendored
|
@ -1,347 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_3vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_3VECT_MAD_AVX2 _gf_3vect_mad_avx2
|
||||
%else
|
||||
%define GF_3VECT_MAD_AVX2 gf_3vect_mad_avx2
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*10 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
vmovdqa [rsp+16*0],xmm6
|
||||
vmovdqa [rsp+16*1],xmm7
|
||||
vmovdqa [rsp+16*2],xmm8
|
||||
vmovdqa [rsp+16*3],xmm9
|
||||
vmovdqa [rsp+16*4],xmm10
|
||||
vmovdqa [rsp+16*5],xmm11
|
||||
vmovdqa [rsp+16*6],xmm12
|
||||
vmovdqa [rsp+16*7],xmm13
|
||||
vmovdqa [rsp+16*8],xmm14
|
||||
vmovdqa [rsp+16*9],xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r15, 10*16 + 1*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp+16*0]
|
||||
vmovdqa xmm7, [rsp+16*1]
|
||||
vmovdqa xmm8, [rsp+16*2]
|
||||
vmovdqa xmm9, [rsp+16*3]
|
||||
vmovdqa xmm10, [rsp+16*4]
|
||||
vmovdqa xmm11, [rsp+16*5]
|
||||
vmovdqa xmm12, [rsp+16*6]
|
||||
vmovdqa xmm13, [rsp+16*7]
|
||||
vmovdqa xmm14, [rsp+16*8]
|
||||
vmovdqa xmm15, [rsp+16*9]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r15, [rsp + 10*16 + 1*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
;;; gf_3vect_mad_avx2(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 mul_array
|
||||
%define dest3 vec_i
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm15
|
||||
%define xmask0fx xmm15
|
||||
%define xgft1_lo ymm14
|
||||
%define xgft1_hi ymm13
|
||||
%define xgft2_lo ymm12
|
||||
%define xgft3_lo ymm11
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xtmph1 ymm2
|
||||
%define xtmpl1 ymm3
|
||||
%define xtmph2 ymm4
|
||||
%define xtmpl2 ymm5
|
||||
%define xtmpl2x xmm5
|
||||
%define xtmph3 ymm6
|
||||
%define xtmpl3 ymm7
|
||||
%define xtmpl3x xmm7
|
||||
%define xd1 ymm8
|
||||
%define xd2 ymm9
|
||||
%define xd3 ymm10
|
||||
|
||||
align 16
|
||||
global GF_3VECT_MAD_AVX2:function
|
||||
func(GF_3VECT_MAD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
sal vec, 5
|
||||
lea tmp, [mul_array + vec_i]
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
mov dest2, [dest1+PS] ; reuse mul_array
|
||||
mov dest3, [dest1+2*PS] ; reuse vec_i
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop32:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
XLDR xd2, [dest2+pos] ;Get next dest vector
|
||||
XLDR xd3, [dest3+pos] ;Get next dest vector
|
||||
vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xtmpl2, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xtmpl3, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
|
||||
vpxor xd1, xd1, xtmph1 ;xd1 += partial
|
||||
|
||||
; dest2
|
||||
vpshufb xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmpl2 ;GF add high and low partials
|
||||
vpxor xd2, xtmph2 ;xd2 += partial
|
||||
|
||||
; dest3
|
||||
vpshufb xtmph3, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph3, xtmpl3 ;GF add high and low partials
|
||||
vpxor xd3, xtmph3 ;xd3 += partial
|
||||
|
||||
XSTR [dest1+pos], xd1
|
||||
XSTR [dest2+pos], xd2
|
||||
XSTR [dest3+pos], xd3
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
.lessthan32:
|
||||
;; Tail len
|
||||
;; Do one more overlap pass
|
||||
mov tmp.b, 0x1f
|
||||
vpinsrb xtmpl2x, xtmpl2x, tmp.w, 0
|
||||
vpbroadcastb xtmpl2, xtmpl2x ;Construct mask 0x1f1f1f...
|
||||
|
||||
mov tmp, len ;Overlapped offset length-32
|
||||
|
||||
XLDR x0, [src+tmp] ;Get next source vector
|
||||
XLDR xd1, [dest1+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest2+tmp] ;Get next dest vector
|
||||
XLDR xd3, [dest3+tmp] ;Get next dest vector
|
||||
|
||||
sub len, pos
|
||||
|
||||
vmovdqa xtmph3, [constip32] ;Load const of i + 32
|
||||
vpinsrb xtmpl3x, xtmpl3x, len.w, 15
|
||||
vinserti128 xtmpl3, xtmpl3, xtmpl3x, 1 ;swapped to xtmpl3x | xtmpl3x
|
||||
vpshufb xtmpl3, xtmpl3, xtmpl2 ;Broadcast len to all bytes. xtmpl2=0x1f1f1f...
|
||||
vpcmpgtb xtmpl3, xtmpl3, xtmph3
|
||||
|
||||
vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
|
||||
vpand xtmph1, xtmph1, xtmpl3
|
||||
vpxor xd1, xd1, xtmph1 ;xd1 += partial
|
||||
|
||||
; dest2
|
||||
vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xgft2_lo ;GF add high and low partials
|
||||
vpand xtmph2, xtmph2, xtmpl3
|
||||
vpxor xd2, xd2, xtmph2 ;xd2 += partial
|
||||
|
||||
; dest3
|
||||
vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph3, xtmph3, xgft3_lo ;GF add high and low partials
|
||||
vpand xtmph3, xtmph3, xtmpl3
|
||||
vpxor xd3, xd3, xtmph3 ;xd3 += partial
|
||||
|
||||
XSTR [dest1+tmp], xd1
|
||||
XSTR [dest2+tmp], xd2
|
||||
XSTR [dest3+tmp], xd3
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 32
|
||||
constip32:
|
||||
ddq 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
|
||||
ddq 0xe0e1e2e3e4e5e6e7e8e9eaebecedeeef
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_3VECT_MAD_AVX2, 04, 00, 0208
|
326
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_3vect_mad_sse.asm
generated
vendored
326
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_3vect_mad_sse.asm
generated
vendored
|
@ -1,326 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_3vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_3VECT_MAD_SSE _gf_3vect_mad_sse
|
||||
%else
|
||||
%define GF_3VECT_MAD_SSE gf_3vect_mad_sse
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*10 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
movdqa [rsp+16*3],xmm9
|
||||
movdqa [rsp+16*4],xmm10
|
||||
movdqa [rsp+16*5],xmm11
|
||||
movdqa [rsp+16*6],xmm12
|
||||
movdqa [rsp+16*7],xmm13
|
||||
movdqa [rsp+16*8],xmm14
|
||||
movdqa [rsp+16*9],xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r15, 10*16 + 1*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
movdqa xmm9, [rsp+16*3]
|
||||
movdqa xmm10, [rsp+16*4]
|
||||
movdqa xmm11, [rsp+16*5]
|
||||
movdqa xmm12, [rsp+16*6]
|
||||
movdqa xmm13, [rsp+16*7]
|
||||
movdqa xmm14, [rsp+16*8]
|
||||
movdqa xmm15, [rsp+16*9]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r15, [rsp + 10*16 + 1*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
;;; gf_3vect_mad_sse(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 mul_array
|
||||
%define dest3 vec_i
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft1_lo xmm14
|
||||
%define xgft1_hi xmm13
|
||||
%define xgft2_lo xmm12
|
||||
%define xgft2_hi xmm11
|
||||
%define xgft3_lo xmm10
|
||||
%define xgft3_hi xmm9
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xtmph1 xmm2
|
||||
%define xtmpl1 xmm3
|
||||
%define xtmph2 xmm4
|
||||
%define xtmpl2 xmm5
|
||||
%define xtmph3 xmm6
|
||||
%define xtmpl3 xmm7
|
||||
%define xd1 xmm8
|
||||
%define xd2 xtmpl1
|
||||
%define xd3 xtmph1
|
||||
|
||||
align 16
|
||||
global GF_3VECT_MAD_SSE:function
|
||||
func(GF_3VECT_MAD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
sal vec, 5
|
||||
lea tmp, [mul_array + vec_i]
|
||||
|
||||
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
movdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
movdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
movdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
movdqu xgft3_hi, [tmp+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
mov dest2, [dest1+PS] ; reuse mul_array
|
||||
mov dest3, [dest1+2*PS] ; reuse vec_i
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop16:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
movdqa xtmph1, xgft1_hi ;Reload const array registers
|
||||
movdqa xtmpl1, xgft1_lo
|
||||
movdqa xtmph2, xgft2_hi ;Reload const array registers
|
||||
movdqa xtmpl2, xgft2_lo
|
||||
movdqa xtmph3, xgft3_hi ;Reload const array registers
|
||||
movdqa xtmpl3, xgft3_lo
|
||||
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
pshufb xtmph1, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph1, xtmpl1 ;GF add high and low partials
|
||||
pxor xd1, xtmph1
|
||||
|
||||
XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector
|
||||
XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest vector
|
||||
|
||||
; dest2
|
||||
pshufb xtmph2, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph2, xtmpl2 ;GF add high and low partials
|
||||
pxor xd2, xtmph2
|
||||
|
||||
; dest3
|
||||
pshufb xtmph3, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph3, xtmpl3 ;GF add high and low partials
|
||||
pxor xd3, xtmph3
|
||||
|
||||
XSTR [dest1+pos], xd1 ;Store result
|
||||
XSTR [dest2+pos], xd2 ;Store result
|
||||
XSTR [dest3+pos], xd3 ;Store result
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
.lessthan16:
|
||||
;; Tail len
|
||||
;; Do one more overlap pass
|
||||
mov tmp, len ;Overlapped offset length-16
|
||||
|
||||
XLDR x0, [src+tmp] ;Get next source vector
|
||||
XLDR xd1, [dest1+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector
|
||||
XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest vector
|
||||
|
||||
sub len, pos
|
||||
|
||||
movdqa xtmph3, [constip16] ;Load const of i + 16
|
||||
pinsrb xtmpl3, len.w, 15
|
||||
pshufb xtmpl3, xmask0f ;Broadcast len to all bytes
|
||||
pcmpgtb xtmpl3, xtmph3
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pand xgft1_hi, xtmpl3
|
||||
pxor xd1, xgft1_hi
|
||||
|
||||
; dest2
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pand xgft2_hi, xtmpl3
|
||||
pxor xd2, xgft2_hi
|
||||
|
||||
; dest3
|
||||
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
pand xgft3_hi, xtmpl3
|
||||
pxor xd3, xgft3_hi
|
||||
|
||||
XSTR [dest1+tmp], xd1 ;Store result
|
||||
XSTR [dest2+tmp], xd2 ;Store result
|
||||
XSTR [dest3+tmp], xd3 ;Store result
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
mask0f:
|
||||
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
constip16:
|
||||
ddq 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_3VECT_MAD_SSE, 00, 00, 0206
|
489
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_4vect_dot_prod_avx.asm
generated
vendored
489
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_4vect_dot_prod_avx.asm
generated
vendored
|
@ -1,489 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_4vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_4VECT_DOT_PROD_AVX _gf_4vect_dot_prod_avx
|
||||
%else
|
||||
%define GF_4VECT_DOT_PROD_AVX gf_4vect_dot_prod_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm12, 6*16
|
||||
save_xmm128 xmm13, 7*16
|
||||
save_xmm128 xmm14, 8*16
|
||||
save_reg r12, 9*16 + 0*8
|
||||
save_reg r13, 9*16 + 1*8
|
||||
save_reg r14, 9*16 + 2*8
|
||||
save_reg r15, 9*16 + 3*8
|
||||
save_reg rdi, 9*16 + 4*8
|
||||
save_reg rsi, 9*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
mov r12, [rsp + 9*16 + 0*8]
|
||||
mov r13, [rsp + 9*16 + 1*8]
|
||||
mov r14, [rsp + 9*16 + 2*8]
|
||||
mov r15, [rsp + 9*16 + 3*8]
|
||||
mov rdi, [rsp + 9*16 + 4*8]
|
||||
mov rsi, [rsp + 9*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
;;;================== High Address;
|
||||
;;; arg4
|
||||
;;; arg3
|
||||
;;; arg2
|
||||
;;; arg1
|
||||
;;; arg0
|
||||
;;; return
|
||||
;;;<================= esp of caller
|
||||
;;; ebp
|
||||
;;;<================= ebp = esp
|
||||
;;; var0
|
||||
;;; var1
|
||||
;;; var2
|
||||
;;; var3
|
||||
;;; esi
|
||||
;;; edi
|
||||
;;; ebx
|
||||
;;;<================= esp of callee
|
||||
;;;
|
||||
;;;================== Low Address;
|
||||
|
||||
%define PS 4
|
||||
%define LOG_PS 2
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp + PS*2 + PS*x]
|
||||
%define var(x) [ebp - PS - PS*x]
|
||||
|
||||
%define trans ecx
|
||||
%define trans2 esi
|
||||
%define arg0 trans ;trans and trans2 are for the variables in stack
|
||||
%define arg0_m arg(0)
|
||||
%define arg1 ebx
|
||||
%define arg2 arg2_m
|
||||
%define arg2_m arg(2)
|
||||
%define arg3 trans
|
||||
%define arg3_m arg(3)
|
||||
%define arg4 trans
|
||||
%define arg4_m arg(4)
|
||||
%define arg5 trans2
|
||||
%define tmp edx
|
||||
%define tmp2 edi
|
||||
%define tmp3 trans2
|
||||
%define tmp3_m var(0)
|
||||
%define tmp4 trans2
|
||||
%define tmp4_m var(1)
|
||||
%define tmp5 trans2
|
||||
%define tmp5_m var(2)
|
||||
%define tmp6 trans2
|
||||
%define tmp6_m var(3)
|
||||
%define return eax
|
||||
%macro SLDR 2 ;stack load/restore
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
sub esp, PS*4 ;4 local variables
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg1, arg(1)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
add esp, PS*4 ;4 local variables
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp4
|
||||
%define dest4 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
%define len_m arg0_m
|
||||
%define src_m arg3_m
|
||||
%define dest1_m arg4_m
|
||||
%define dest2_m tmp3_m
|
||||
%define dest3_m tmp4_m
|
||||
%define dest4_m tmp5_m
|
||||
%define vskip3_m tmp6_m
|
||||
%endif
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
|
||||
section .text
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
%define xmask0f xmm14
|
||||
%define xgft1_lo xmm13
|
||||
%define xgft1_hi xmm12
|
||||
%define xgft2_lo xmm11
|
||||
%define xgft2_hi xmm10
|
||||
%define xgft3_lo xmm9
|
||||
%define xgft3_hi xmm8
|
||||
%define xgft4_lo xmm7
|
||||
%define xgft4_hi xmm6
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%define xp4 xmm5
|
||||
%else
|
||||
%define xmm_trans xmm7 ;reuse xmask0f and xgft1_lo
|
||||
%define xmask0f xmm_trans
|
||||
%define xgft1_lo xmm_trans
|
||||
%define xgft1_hi xmm6
|
||||
%define xgft2_lo xgft1_lo
|
||||
%define xgft2_hi xgft1_hi
|
||||
%define xgft3_lo xgft1_lo
|
||||
%define xgft3_hi xgft1_hi
|
||||
%define xgft4_lo xgft1_lo
|
||||
%define xgft4_hi xgft1_hi
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%define xp4 xmm5
|
||||
%endif
|
||||
align 16
|
||||
global GF_4VECT_DOT_PROD_AVX:function
|
||||
func(GF_4VECT_DOT_PROD_AVX)
|
||||
FUNC_SAVE
|
||||
SLDR len, len_m
|
||||
sub len, 16
|
||||
SSTR len_m, len
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
SSTR vskip3_m, vskip3
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
SLDR dest1, dest1_m
|
||||
mov dest2, [dest1+PS]
|
||||
SSTR dest2_m, dest2
|
||||
mov dest3, [dest1+2*PS]
|
||||
SSTR dest3_m, dest3
|
||||
mov dest4, [dest1+3*PS]
|
||||
SSTR dest4_m, dest4
|
||||
mov dest1, [dest1]
|
||||
SSTR dest1_m, dest1
|
||||
|
||||
.loop16:
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
vpxor xp4, xp4
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
SLDR src, src_m
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
%else ;32-bit code
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
%endif
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
%endif
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
sal vec, 1
|
||||
vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
sar vec, 1
|
||||
%endif
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
SLDR vskip3, vskip3_m
|
||||
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
vpxor xp4, xgft4_hi ;xp4 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
SLDR dest1, dest1_m
|
||||
SLDR dest2, dest2_m
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
SLDR dest3, dest3_m
|
||||
XSTR [dest3+pos], xp3
|
||||
SLDR dest4, dest4_m
|
||||
XSTR [dest4+pos], xp4
|
||||
|
||||
SLDR len, len_m
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_4VECT_DOT_PROD_AVX, 02, 04, 0193
|
510
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_4vect_dot_prod_avx2.asm
generated
vendored
510
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_4vect_dot_prod_avx2.asm
generated
vendored
|
@ -1,510 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_4vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_4VECT_DOT_PROD_AVX2 _gf_4vect_dot_prod_avx2
|
||||
%else
|
||||
%define GF_4VECT_DOT_PROD_AVX2 gf_4vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqa [rsp + 0*16], xmm6
|
||||
vmovdqa [rsp + 1*16], xmm7
|
||||
vmovdqa [rsp + 2*16], xmm8
|
||||
vmovdqa [rsp + 3*16], xmm9
|
||||
vmovdqa [rsp + 4*16], xmm10
|
||||
vmovdqa [rsp + 5*16], xmm11
|
||||
vmovdqa [rsp + 6*16], xmm12
|
||||
vmovdqa [rsp + 7*16], xmm13
|
||||
vmovdqa [rsp + 8*16], xmm14
|
||||
save_reg r12, 9*16 + 0*8
|
||||
save_reg r13, 9*16 + 1*8
|
||||
save_reg r14, 9*16 + 2*8
|
||||
save_reg r15, 9*16 + 3*8
|
||||
save_reg rdi, 9*16 + 4*8
|
||||
save_reg rsi, 9*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
mov r12, [rsp + 9*16 + 0*8]
|
||||
mov r13, [rsp + 9*16 + 1*8]
|
||||
mov r14, [rsp + 9*16 + 2*8]
|
||||
mov r15, [rsp + 9*16 + 3*8]
|
||||
mov rdi, [rsp + 9*16 + 4*8]
|
||||
mov rsi, [rsp + 9*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
;;;================== High Address;
|
||||
;;; arg4
|
||||
;;; arg3
|
||||
;;; arg2
|
||||
;;; arg1
|
||||
;;; arg0
|
||||
;;; return
|
||||
;;;<================= esp of caller
|
||||
;;; ebp
|
||||
;;;<================= ebp = esp
|
||||
;;; var0
|
||||
;;; var1
|
||||
;;; var2
|
||||
;;; var3
|
||||
;;; esi
|
||||
;;; edi
|
||||
;;; ebx
|
||||
;;;<================= esp of callee
|
||||
;;;
|
||||
;;;================== Low Address;
|
||||
|
||||
%define PS 4
|
||||
%define LOG_PS 2
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp + PS*2 + PS*x]
|
||||
%define var(x) [ebp - PS - PS*x]
|
||||
|
||||
%define trans ecx
|
||||
%define trans2 esi
|
||||
%define arg0 trans ;trans and trans2 are for the variables in stack
|
||||
%define arg0_m arg(0)
|
||||
%define arg1 ebx
|
||||
%define arg2 arg2_m
|
||||
%define arg2_m arg(2)
|
||||
%define arg3 trans
|
||||
%define arg3_m arg(3)
|
||||
%define arg4 trans
|
||||
%define arg4_m arg(4)
|
||||
%define arg5 trans2
|
||||
%define tmp edx
|
||||
%define tmp.w edx
|
||||
%define tmp.b dl
|
||||
%define tmp2 edi
|
||||
%define tmp3 trans2
|
||||
%define tmp3_m var(0)
|
||||
%define tmp4 trans2
|
||||
%define tmp4_m var(1)
|
||||
%define tmp5 trans2
|
||||
%define tmp5_m var(2)
|
||||
%define tmp6 trans2
|
||||
%define tmp6_m var(3)
|
||||
%define return eax
|
||||
%macro SLDR 2 ;stack load/restore
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
sub esp, PS*4 ;4 local variables
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg1, arg(1)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
add esp, PS*4 ;4 local variables
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp4
|
||||
%define dest4 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
%define len_m arg0_m
|
||||
%define src_m arg3_m
|
||||
%define dest1_m arg4_m
|
||||
%define dest2_m tmp3_m
|
||||
%define dest3_m tmp4_m
|
||||
%define dest4_m tmp5_m
|
||||
%define vskip3_m tmp6_m
|
||||
%endif
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
|
||||
section .text
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
%define xmask0f ymm14
|
||||
%define xmask0fx xmm14
|
||||
%define xgft1_lo ymm13
|
||||
%define xgft1_hi ymm12
|
||||
%define xgft2_lo ymm11
|
||||
%define xgft2_hi ymm10
|
||||
%define xgft3_lo ymm9
|
||||
%define xgft3_hi ymm8
|
||||
%define xgft4_lo ymm7
|
||||
%define xgft4_hi ymm6
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
%define xp3 ymm4
|
||||
%define xp4 ymm5
|
||||
%else
|
||||
%define ymm_trans ymm7 ;reuse xmask0f and xgft1_hi
|
||||
%define xmask0f ymm_trans
|
||||
%define xmask0fx xmm7
|
||||
%define xgft1_lo ymm6
|
||||
%define xgft1_hi ymm_trans
|
||||
%define xgft2_lo xgft1_lo
|
||||
%define xgft2_hi xgft1_hi
|
||||
%define xgft3_lo xgft1_lo
|
||||
%define xgft3_hi xgft1_hi
|
||||
%define xgft4_lo xgft1_lo
|
||||
%define xgft4_hi xgft1_hi
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
%define xp3 ymm4
|
||||
%define xp4 ymm5
|
||||
%endif
|
||||
align 16
|
||||
global GF_4VECT_DOT_PROD_AVX2:function
|
||||
func(GF_4VECT_DOT_PROD_AVX2)
|
||||
FUNC_SAVE
|
||||
SLDR len, len_m
|
||||
sub len, 32
|
||||
SSTR len_m, len
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
SSTR vskip3_m, vskip3
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
SLDR dest1, dest1_m
|
||||
mov dest2, [dest1+PS]
|
||||
SSTR dest2_m, dest2
|
||||
mov dest3, [dest1+2*PS]
|
||||
SSTR dest3_m, dest3
|
||||
mov dest4, [dest1+3*PS]
|
||||
SSTR dest4_m, dest4
|
||||
mov dest1, [dest1]
|
||||
SSTR dest1_m, dest1
|
||||
|
||||
.loop32:
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
vpxor xp4, xp4
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
SLDR src, src_m
|
||||
mov ptr, [src+vec_i]
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
add vec_i, PS
|
||||
%ifidn PS,8 ;64-bit code
|
||||
vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||
add tmp, 32
|
||||
%else ;32-bit code
|
||||
mov cl, 0x0f ;use ecx as a temp variable
|
||||
vpinsrb xmask0fx, xmask0fx, ecx, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
%endif
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
%ifidn PS,4 ; 32-bit code
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
%endif
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
%ifidn PS,4 ; 32-bit code
|
||||
sal vec, 1
|
||||
vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
sar vec, 1
|
||||
%endif
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
%ifidn PS,4 ; 32-bit code
|
||||
SLDR vskip3, vskip3_m
|
||||
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
; " DX{00}, Dx{10}, ..., Dx{f0}
|
||||
vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||
add tmp, 32
|
||||
%endif
|
||||
vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
vpxor xp4, xgft4_hi ;xp4 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
SLDR dest1, dest1_m
|
||||
SLDR dest2, dest2_m
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
SLDR dest3, dest3_m
|
||||
XSTR [dest3+pos], xp3
|
||||
SLDR dest4, dest4_m
|
||||
XSTR [dest4+pos], xp4
|
||||
|
||||
SLDR len, len_m
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-32
|
||||
jmp .loop32 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_4VECT_DOT_PROD_AVX2, 04, 04, 0198
|
491
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_4vect_dot_prod_sse.asm
generated
vendored
491
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_4vect_dot_prod_sse.asm
generated
vendored
|
@ -1,491 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_4vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_4VECT_DOT_PROD_SSE _gf_4vect_dot_prod_sse
|
||||
%else
|
||||
%define GF_4VECT_DOT_PROD_SSE gf_4vect_dot_prod_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm12, 6*16
|
||||
save_xmm128 xmm13, 7*16
|
||||
save_xmm128 xmm14, 8*16
|
||||
save_reg r12, 9*16 + 0*8
|
||||
save_reg r13, 9*16 + 1*8
|
||||
save_reg r14, 9*16 + 2*8
|
||||
save_reg r15, 9*16 + 3*8
|
||||
save_reg rdi, 9*16 + 4*8
|
||||
save_reg rsi, 9*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
movdqa xmm9, [rsp + 3*16]
|
||||
movdqa xmm10, [rsp + 4*16]
|
||||
movdqa xmm11, [rsp + 5*16]
|
||||
movdqa xmm12, [rsp + 6*16]
|
||||
movdqa xmm13, [rsp + 7*16]
|
||||
movdqa xmm14, [rsp + 8*16]
|
||||
mov r12, [rsp + 9*16 + 0*8]
|
||||
mov r13, [rsp + 9*16 + 1*8]
|
||||
mov r14, [rsp + 9*16 + 2*8]
|
||||
mov r15, [rsp + 9*16 + 3*8]
|
||||
mov rdi, [rsp + 9*16 + 4*8]
|
||||
mov rsi, [rsp + 9*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
;;;================== High Address;
|
||||
;;; arg4
|
||||
;;; arg3
|
||||
;;; arg2
|
||||
;;; arg1
|
||||
;;; arg0
|
||||
;;; return
|
||||
;;;<================= esp of caller
|
||||
;;; ebp
|
||||
;;;<================= ebp = esp
|
||||
;;; var0
|
||||
;;; var1
|
||||
;;; var2
|
||||
;;; var3
|
||||
;;; esi
|
||||
;;; edi
|
||||
;;; ebx
|
||||
;;;<================= esp of callee
|
||||
;;;
|
||||
;;;================== Low Address;
|
||||
|
||||
%define PS 4
|
||||
%define LOG_PS 2
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp + PS*2 + PS*x]
|
||||
%define var(x) [ebp - PS - PS*x]
|
||||
|
||||
%define trans ecx
|
||||
%define trans2 esi
|
||||
%define arg0 trans ;trans and trans2 are for the variables in stack
|
||||
%define arg0_m arg(0)
|
||||
%define arg1 ebx
|
||||
%define arg2 arg2_m
|
||||
%define arg2_m arg(2)
|
||||
%define arg3 trans
|
||||
%define arg3_m arg(3)
|
||||
%define arg4 trans
|
||||
%define arg4_m arg(4)
|
||||
%define arg5 trans2
|
||||
%define tmp edx
|
||||
%define tmp2 edi
|
||||
%define tmp3 trans2
|
||||
%define tmp3_m var(0)
|
||||
%define tmp4 trans2
|
||||
%define tmp4_m var(1)
|
||||
%define tmp5 trans2
|
||||
%define tmp5_m var(2)
|
||||
%define tmp6 trans2
|
||||
%define tmp6_m var(3)
|
||||
%define return eax
|
||||
%macro SLDR 2 ;stack load/restore
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
sub esp, PS*4 ;4 local variables
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg1, arg(1)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
add esp, PS*4 ;4 local variables
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp4
|
||||
%define dest4 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
%define len_m arg0_m
|
||||
%define src_m arg3_m
|
||||
%define dest1_m arg4_m
|
||||
%define dest2_m tmp3_m
|
||||
%define dest3_m tmp4_m
|
||||
%define dest4_m tmp5_m
|
||||
%define vskip3_m tmp6_m
|
||||
%endif
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
|
||||
section .text
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
%define xmask0f xmm14
|
||||
%define xgft1_lo xmm2
|
||||
%define xgft1_hi xmm3
|
||||
%define xgft2_lo xmm11
|
||||
%define xgft2_hi xmm4
|
||||
%define xgft3_lo xmm9
|
||||
%define xgft3_hi xmm5
|
||||
%define xgft4_lo xmm7
|
||||
%define xgft4_hi xmm6
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm8
|
||||
%define xp2 xmm10
|
||||
%define xp3 xmm12
|
||||
%define xp4 xmm13
|
||||
%else
|
||||
%define xmm_trans xmm7 ;reuse xmask0f and xgft1_lo
|
||||
%define xmask0f xmm_trans
|
||||
%define xgft1_lo xmm_trans
|
||||
%define xgft1_hi xmm6
|
||||
%define xgft2_lo xgft1_lo
|
||||
%define xgft2_hi xgft1_hi
|
||||
%define xgft3_lo xgft1_lo
|
||||
%define xgft3_hi xgft1_hi
|
||||
%define xgft4_lo xgft1_lo
|
||||
%define xgft4_hi xgft1_hi
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%define xp4 xmm5
|
||||
%endif
|
||||
align 16
|
||||
global GF_4VECT_DOT_PROD_SSE:function
|
||||
func(GF_4VECT_DOT_PROD_SSE)
|
||||
FUNC_SAVE
|
||||
SLDR len, len_m
|
||||
sub len, 16
|
||||
SSTR len_m, len
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
SSTR vskip3_m, vskip3
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
SLDR dest1, dest1_m
|
||||
mov dest2, [dest1+PS]
|
||||
SSTR dest2_m, dest2
|
||||
mov dest3, [dest1+2*PS]
|
||||
SSTR dest3_m, dest3
|
||||
mov dest4, [dest1+3*PS]
|
||||
SSTR dest4_m, dest4
|
||||
mov dest1, [dest1]
|
||||
SSTR dest1_m, dest1
|
||||
|
||||
.loop16:
|
||||
pxor xp1, xp1
|
||||
pxor xp2, xp2
|
||||
pxor xp3, xp3
|
||||
pxor xp4, xp4
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
SLDR src, src_m
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
movdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
%else ;32-bit code
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
%endif
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
%endif
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
sal vec, 1
|
||||
movdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
sar vec, 1
|
||||
%endif
|
||||
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
pxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
SLDR vskip3, vskip3_m
|
||||
movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
%endif
|
||||
pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
pxor xp4, xgft4_hi ;xp4 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
SLDR dest1, dest1_m
|
||||
SLDR dest2, dest2_m
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
SLDR dest3, dest3_m
|
||||
XSTR [dest3+pos], xp3
|
||||
SLDR dest4, dest4_m
|
||||
XSTR [dest4+pos], xp4
|
||||
|
||||
SLDR len, len_m
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_4VECT_DOT_PROD_SSE, 00, 05, 0064
|
370
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_4vect_mad_avx.asm
generated
vendored
370
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_4vect_mad_avx.asm
generated
vendored
|
@ -1,370 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_4vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_4VECT_MAD_AVX _gf_4vect_mad_avx
|
||||
%else
|
||||
%define GF_4VECT_MAD_AVX gf_4vect_mad_avx
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*10 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
movdqa [rsp+16*3],xmm9
|
||||
movdqa [rsp+16*4],xmm10
|
||||
movdqa [rsp+16*5],xmm11
|
||||
movdqa [rsp+16*6],xmm12
|
||||
movdqa [rsp+16*7],xmm13
|
||||
movdqa [rsp+16*8],xmm14
|
||||
movdqa [rsp+16*9],xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r15, 10*16 + 2*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
movdqa xmm9, [rsp+16*3]
|
||||
movdqa xmm10, [rsp+16*4]
|
||||
movdqa xmm11, [rsp+16*5]
|
||||
movdqa xmm12, [rsp+16*6]
|
||||
movdqa xmm13, [rsp+16*7]
|
||||
movdqa xmm14, [rsp+16*8]
|
||||
movdqa xmm15, [rsp+16*9]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r15, [rsp + 10*16 + 2*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
;;; gf_4vect_mad_avx(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 mul_array
|
||||
%define dest3 tmp2
|
||||
%define dest4 vec_i
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft3_hi xmm14
|
||||
%define xgft4_hi xmm13
|
||||
%define xgft4_lo xmm12
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xtmph1 xmm2
|
||||
%define xtmpl1 xmm3
|
||||
%define xtmph2 xmm4
|
||||
%define xtmpl2 xmm5
|
||||
%define xtmph3 xmm6
|
||||
%define xtmpl3 xmm7
|
||||
%define xtmph4 xmm8
|
||||
%define xtmpl4 xmm9
|
||||
%define xd1 xmm10
|
||||
%define xd2 xmm11
|
||||
%define xd3 xtmph1
|
||||
%define xd4 xtmpl1
|
||||
|
||||
align 16
|
||||
global GF_4VECT_MAD_AVX:function
|
||||
func(GF_4VECT_MAD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
|
||||
mov tmp, vec
|
||||
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
lea tmp3, [mul_array + vec_i]
|
||||
|
||||
sal tmp, 6 ;Multiply by 64
|
||||
vmovdqu xgft3_hi, [tmp3+tmp+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
sal vec, 5 ;Multiply by 32
|
||||
add tmp, vec
|
||||
vmovdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
|
||||
vmovdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
|
||||
|
||||
mov dest2, [dest1+PS] ; reuse mul_array
|
||||
mov dest3, [dest1+2*PS]
|
||||
mov dest4, [dest1+3*PS] ; reuse vec_i
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop16:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
XLDR xd2, [dest2+pos] ;Get next dest vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
|
||||
vpxor xd1, xd1, xtmph1
|
||||
|
||||
XLDR xd3, [dest3+pos] ;Reuse xtmph1, Get next dest vector
|
||||
XLDR xd4, [dest4+pos] ;Reuse xtmpl1, Get next dest vector
|
||||
|
||||
; dest2
|
||||
vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
|
||||
vpxor xd2, xd2, xtmph2
|
||||
|
||||
; dest3
|
||||
vpshufb xtmph3, xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
|
||||
vpxor xd3, xd3, xtmph3
|
||||
|
||||
; dest4
|
||||
vpshufb xtmph4, xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl4, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph4, xtmph4, xtmpl4 ;GF add high and low partials
|
||||
vpxor xd4, xd4, xtmph4
|
||||
|
||||
XSTR [dest1+pos], xd1 ;Store result
|
||||
XSTR [dest2+pos], xd2 ;Store result
|
||||
XSTR [dest3+pos], xd3 ;Store result
|
||||
XSTR [dest4+pos], xd4 ;Store result
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
.lessthan16:
|
||||
;; Tail len
|
||||
;; Do one more overlap pass
|
||||
|
||||
mov tmp, len ;Overlapped offset length-16
|
||||
|
||||
XLDR x0, [src+tmp] ;Get next source vector
|
||||
|
||||
vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
|
||||
XLDR xd1, [dest1+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest2+tmp] ;Get next dest vector
|
||||
XLDR xtmph4, [dest3+tmp] ;Get next dest vector
|
||||
|
||||
sub len, pos
|
||||
|
||||
vmovdqa xtmpl4, [constip16] ;Load const of i + 16
|
||||
vpinsrb xtmph3, xtmph3, len.w, 15
|
||||
vpshufb xtmph3, xtmph3, xmask0f ;Broadcast len to all bytes
|
||||
vpcmpgtb xtmph3, xtmph3, xtmpl4
|
||||
|
||||
XLDR xtmpl4, [dest4+tmp] ;Get next dest vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
|
||||
vpand xtmph1, xtmph1, xtmph3
|
||||
vpxor xd1, xd1, xtmph1
|
||||
|
||||
; dest2
|
||||
vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
|
||||
vpand xtmph2, xtmph2, xtmph3
|
||||
vpxor xd2, xd2, xtmph2
|
||||
|
||||
; dest3
|
||||
vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_hi, xtmpl3 ;GF add high and low partials
|
||||
vpand xgft3_hi, xgft3_hi, xtmph3
|
||||
vpxor xtmph4, xtmph4, xgft3_hi
|
||||
|
||||
; dest4
|
||||
vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
vpand xgft4_hi, xgft4_hi, xtmph3
|
||||
vpxor xtmpl4, xtmpl4, xgft4_hi
|
||||
|
||||
XSTR [dest1+tmp], xd1 ;Store result
|
||||
XSTR [dest2+tmp], xd2 ;Store result
|
||||
XSTR [dest3+tmp], xtmph4 ;Store result
|
||||
XSTR [dest4+tmp], xtmpl4 ;Store result
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
constip16:
|
||||
ddq 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
|
||||
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_4VECT_MAD_AVX, 02, 00, 020a
|
371
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_4vect_mad_avx2.asm
generated
vendored
371
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_4vect_mad_avx2.asm
generated
vendored
|
@ -1,371 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_4vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_4VECT_MAD_AVX2 _gf_4vect_mad_avx2
|
||||
%else
|
||||
%define GF_4VECT_MAD_AVX2 gf_4vect_mad_avx2
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*10 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
movdqa [rsp+16*3],xmm9
|
||||
movdqa [rsp+16*4],xmm10
|
||||
movdqa [rsp+16*5],xmm11
|
||||
movdqa [rsp+16*6],xmm12
|
||||
movdqa [rsp+16*7],xmm13
|
||||
movdqa [rsp+16*8],xmm14
|
||||
movdqa [rsp+16*9],xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r15, 10*16 + 1*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
movdqa xmm9, [rsp+16*3]
|
||||
movdqa xmm10, [rsp+16*4]
|
||||
movdqa xmm11, [rsp+16*5]
|
||||
movdqa xmm12, [rsp+16*6]
|
||||
movdqa xmm13, [rsp+16*7]
|
||||
movdqa xmm14, [rsp+16*8]
|
||||
movdqa xmm15, [rsp+16*9]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r15, [rsp + 10*16 + 1*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
|
||||
;;; gf_4vect_mad_avx2(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 mul_array
|
||||
%define dest3 vec
|
||||
%define dest4 vec_i
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm15
|
||||
%define xmask0fx xmm15
|
||||
%define xgft1_lo ymm14
|
||||
%define xgft2_lo ymm13
|
||||
%define xgft3_lo ymm12
|
||||
%define xgft4_lo ymm11
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xtmpl ymm2
|
||||
%define xtmplx xmm2
|
||||
%define xtmph1 ymm3
|
||||
%define xtmph1x xmm3
|
||||
%define xtmph2 ymm4
|
||||
%define xtmph3 ymm5
|
||||
%define xtmph4 ymm6
|
||||
%define xd1 ymm7
|
||||
%define xd2 ymm8
|
||||
%define xd3 ymm9
|
||||
%define xd4 ymm10
|
||||
|
||||
align 16
|
||||
global GF_4VECT_MAD_AVX2:function
|
||||
func(GF_4VECT_MAD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
sal vec, 5 ;Multiply by 32
|
||||
lea tmp, [mul_array + vec_i]
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
add tmp, vec
|
||||
vmovdqu xgft4_lo, [tmp+2*vec] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
|
||||
; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
|
||||
|
||||
mov dest2, [dest1+PS] ; reuse mul_array
|
||||
mov dest3, [dest1+2*PS] ; reuse vec
|
||||
mov dest4, [dest1+3*PS] ; reuse vec_i
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop32:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
XLDR xd2, [dest2+pos] ;Get next dest vector
|
||||
XLDR xd3, [dest3+pos] ;Get next dest vector
|
||||
XLDR xd4, [dest4+pos] ;reuse xtmpl1. Get next dest vector
|
||||
|
||||
vpand xtmpl, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vperm2i128 xtmpa, xtmpl, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xtmpl, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xtmph4, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||
|
||||
; dest1
|
||||
vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials
|
||||
vpxor xd1, xd1, xtmph1 ;xd1 += partial
|
||||
|
||||
; dest2
|
||||
vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xtmpl ;GF add high and low partials
|
||||
vpxor xd2, xd2, xtmph2 ;xd2 += partial
|
||||
|
||||
; dest3
|
||||
vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph3, xtmph3, xtmpl ;GF add high and low partials
|
||||
vpxor xd3, xd3, xtmph3 ;xd3 += partial
|
||||
|
||||
; dest4
|
||||
vpshufb xtmph4, xtmph4, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph4, xtmph4, xtmpl ;GF add high and low partials
|
||||
vpxor xd4, xd4, xtmph4 ;xd4 += partial
|
||||
|
||||
XSTR [dest1+pos], xd1
|
||||
XSTR [dest2+pos], xd2
|
||||
XSTR [dest3+pos], xd3
|
||||
XSTR [dest4+pos], xd4
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
.lessthan32:
|
||||
;; Tail len
|
||||
;; Do one more overlap pass
|
||||
mov tmp.b, 0x1f
|
||||
vpinsrb xtmph1x, xtmph1x, tmp.w, 0
|
||||
vpbroadcastb xtmph1, xtmph1x ;Construct mask 0x1f1f1f...
|
||||
|
||||
mov tmp, len ;Overlapped offset length-32
|
||||
|
||||
XLDR x0, [src+tmp] ;Get next source vector
|
||||
|
||||
XLDR xd1, [dest1+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest2+tmp] ;Get next dest vector
|
||||
XLDR xd3, [dest3+tmp] ;Get next dest vector
|
||||
XLDR xd4, [dest4+tmp] ;Get next dest vector
|
||||
|
||||
sub len, pos
|
||||
|
||||
vmovdqa xtmph2, [constip32] ;Load const of i + 32
|
||||
vpinsrb xtmplx, xtmplx, len.w, 15
|
||||
vinserti128 xtmpl, xtmpl, xtmplx, 1 ;swapped to xtmplx | xtmplx
|
||||
vpshufb xtmpl, xtmpl, xtmph1 ;Broadcast len to all bytes. xtmph1=0x1f1f1f...
|
||||
vpcmpgtb xtmpl, xtmpl, xtmph2
|
||||
|
||||
vpand xtmph1, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vperm2i128 xtmpa, xtmph1, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xtmph1, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xtmph4, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||
|
||||
; dest1
|
||||
vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xgft1_lo ;GF add high and low partials
|
||||
vpand xtmph1, xtmph1, xtmpl
|
||||
vpxor xd1, xd1, xtmph1 ;xd1 += partial
|
||||
|
||||
; dest2
|
||||
vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xgft2_lo ;GF add high and low partials
|
||||
vpand xtmph2, xtmph2, xtmpl
|
||||
vpxor xd2, xd2, xtmph2 ;xd2 += partial
|
||||
|
||||
; dest3
|
||||
vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph3, xtmph3, xgft3_lo ;GF add high and low partials
|
||||
vpand xtmph3, xtmph3, xtmpl
|
||||
vpxor xd3, xd3, xtmph3 ;xd3 += partial
|
||||
|
||||
; dest4
|
||||
vpshufb xtmph4, xtmph4, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph4, xtmph4, xgft4_lo ;GF add high and low partials
|
||||
vpand xtmph4, xtmph4, xtmpl
|
||||
vpxor xd4, xd4, xtmph4 ;xd4 += partial
|
||||
|
||||
XSTR [dest1+tmp], xd1
|
||||
XSTR [dest2+tmp], xd2
|
||||
XSTR [dest3+tmp], xd3
|
||||
XSTR [dest4+tmp], xd4
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
align 32
|
||||
constip32:
|
||||
ddq 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
|
||||
ddq 0xe0e1e2e3e4e5e6e7e8e9eaebecedeeef
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_4VECT_MAD_AVX2, 04, 00, 020b
|
375
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_4vect_mad_sse.asm
generated
vendored
375
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_4vect_mad_sse.asm
generated
vendored
|
@ -1,375 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_4vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_4VECT_MAD_SSE _gf_4vect_mad_sse
|
||||
%else
|
||||
%define GF_4VECT_MAD_SSE gf_4vect_mad_sse
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*10 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
movdqa [rsp+16*3],xmm9
|
||||
movdqa [rsp+16*4],xmm10
|
||||
movdqa [rsp+16*5],xmm11
|
||||
movdqa [rsp+16*6],xmm12
|
||||
movdqa [rsp+16*7],xmm13
|
||||
movdqa [rsp+16*8],xmm14
|
||||
movdqa [rsp+16*9],xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r15, 10*16 + 2*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
movdqa xmm9, [rsp+16*3]
|
||||
movdqa xmm10, [rsp+16*4]
|
||||
movdqa xmm11, [rsp+16*5]
|
||||
movdqa xmm12, [rsp+16*6]
|
||||
movdqa xmm13, [rsp+16*7]
|
||||
movdqa xmm14, [rsp+16*8]
|
||||
movdqa xmm15, [rsp+16*9]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r15, [rsp + 10*16 + 2*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
;;; gf_4vect_mad_sse(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 mul_array
|
||||
%define dest3 tmp2
|
||||
%define dest4 vec_i
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft3_hi xmm14
|
||||
%define xgft4_hi xmm13
|
||||
%define xgft4_lo xmm12
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xtmph1 xmm2
|
||||
%define xtmpl1 xmm3
|
||||
%define xtmph2 xmm4
|
||||
%define xtmpl2 xmm5
|
||||
%define xtmph3 xmm6
|
||||
%define xtmpl3 xmm7
|
||||
%define xtmph4 xmm8
|
||||
%define xtmpl4 xmm9
|
||||
%define xd1 xmm10
|
||||
%define xd2 xmm11
|
||||
%define xd3 xtmph1
|
||||
%define xd4 xtmpl1
|
||||
|
||||
align 16
|
||||
global GF_4VECT_MAD_SSE:function
|
||||
func(GF_4VECT_MAD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov tmp, vec
|
||||
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
lea tmp3, [mul_array + vec_i]
|
||||
|
||||
sal tmp, 6 ;Multiply by 64
|
||||
|
||||
movdqu xgft3_hi, [tmp3+tmp+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
sal vec, 5 ;Multiply by 32
|
||||
add tmp, vec
|
||||
movdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
|
||||
movdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
|
||||
|
||||
mov dest2, [dest1+PS] ; reuse mul_array
|
||||
mov dest3, [dest1+2*PS]
|
||||
mov dest4, [dest1+3*PS] ; reuse vec_i
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop16:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
|
||||
movdqa xtmph3, xgft3_hi
|
||||
movdqa xtmpl4, xgft4_lo
|
||||
movdqa xtmph4, xgft4_hi
|
||||
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
XLDR xd2, [dest2+pos] ;Get next dest vector
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
pshufb xtmph1, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph1, xtmpl1 ;GF add high and low partials
|
||||
pxor xd1, xtmph1
|
||||
|
||||
XLDR xd3, [dest3+pos] ;Reuse xtmph1, Get next dest vector
|
||||
XLDR xd4, [dest4+pos] ;Reuse xtmpl1, Get next dest vector
|
||||
|
||||
; dest2
|
||||
pshufb xtmph2, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph2, xtmpl2 ;GF add high and low partials
|
||||
pxor xd2, xtmph2
|
||||
|
||||
; dest3
|
||||
pshufb xtmph3, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph3, xtmpl3 ;GF add high and low partials
|
||||
pxor xd3, xtmph3
|
||||
|
||||
; dest4
|
||||
pshufb xtmph4, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl4, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph4, xtmpl4 ;GF add high and low partials
|
||||
pxor xd4, xtmph4
|
||||
|
||||
XSTR [dest1+pos], xd1 ;Store result
|
||||
XSTR [dest2+pos], xd2 ;Store result
|
||||
XSTR [dest3+pos], xd3 ;Store result
|
||||
XSTR [dest4+pos], xd4 ;Store result
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
.lessthan16:
|
||||
;; Tail len
|
||||
;; Do one more overlap pass
|
||||
mov tmp, len ;Overlapped offset length-16
|
||||
|
||||
XLDR x0, [src+tmp] ;Get next source vector
|
||||
|
||||
movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
|
||||
XLDR xd1, [dest1+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest2+tmp] ;Get next dest vector
|
||||
XLDR xtmph4, [dest3+tmp] ;Reuse xtmph1. Get next dest vector
|
||||
|
||||
sub len, pos
|
||||
|
||||
movdqa xtmpl4, [constip16] ;Load const of i + 16
|
||||
pinsrb xtmph3, len.w, 15
|
||||
pshufb xtmph3, xmask0f ;Broadcast len to all bytes
|
||||
pcmpgtb xtmph3, xtmpl4
|
||||
|
||||
XLDR xtmpl4, [dest4+tmp] ;Get next dest vector
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
pshufb xtmph1, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph1, xtmpl1 ;GF add high and low partials
|
||||
pand xtmph1, xtmph3
|
||||
pxor xd1, xtmph1
|
||||
|
||||
; dest2
|
||||
pshufb xtmph2, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph2, xtmpl2 ;GF add high and low partials
|
||||
pand xtmph2, xtmph3
|
||||
pxor xd2, xtmph2
|
||||
|
||||
; dest3
|
||||
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft3_hi, xtmpl3 ;GF add high and low partials
|
||||
pand xgft3_hi, xtmph3
|
||||
pxor xtmph4, xgft3_hi
|
||||
|
||||
; dest4
|
||||
pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
pand xgft4_hi, xtmph3
|
||||
pxor xtmpl4, xgft4_hi
|
||||
|
||||
XSTR [dest1+tmp], xd1 ;Store result
|
||||
XSTR [dest2+tmp], xd2 ;Store result
|
||||
XSTR [dest3+tmp], xtmph4 ;Store result
|
||||
XSTR [dest4+tmp], xtmpl4 ;Store result
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
mask0f:
|
||||
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
constip16:
|
||||
ddq 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_4VECT_MAD_SSE, 00, 00, 0209
|
348
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_5vect_dot_prod_avx.asm
generated
vendored
348
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_5vect_dot_prod_avx.asm
generated
vendored
|
@ -1,348 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_5vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_5VECT_DOT_PROD_AVX _gf_5vect_dot_prod_avx
|
||||
%else
|
||||
%define GF_5VECT_DOT_PROD_AVX gf_5vect_dot_prod_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm12, 6*16
|
||||
save_xmm128 xmm13, 7*16
|
||||
save_xmm128 xmm14, 8*16
|
||||
save_xmm128 xmm15, 9*16
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
save_reg rsi, 10*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
vmovdqa xmm15, [rsp + 9*16]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
mov rsi, [rsp + 10*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest1 tmp3
|
||||
%define dest2 tmp4
|
||||
%define vskip1 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft1_lo xmm14
|
||||
%define xgft1_hi xmm13
|
||||
%define xgft2_lo xmm12
|
||||
%define xgft2_hi xmm11
|
||||
%define xgft3_lo xmm10
|
||||
%define xgft3_hi xmm9
|
||||
%define xgft4_lo xmm8
|
||||
%define xgft4_hi xmm7
|
||||
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%define xp4 xmm5
|
||||
%define xp5 xmm6
|
||||
|
||||
align 16
|
||||
global GF_5VECT_DOT_PROD_AVX:function
|
||||
func(GF_5VECT_DOT_PROD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov vskip1, vec
|
||||
imul vskip1, 32
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest1, [dest]
|
||||
mov dest2, [dest+PS]
|
||||
|
||||
|
||||
.loop16:
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
vpxor xp4, xp4
|
||||
vpxor xp5, xp5
|
||||
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
add vec_i, PS
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
vmovdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
vmovdqu xgft1_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
add tmp, 32
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
vpxor xp4, xgft4_hi ;xp4 += partial
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp5, xgft1_hi ;xp5 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
mov tmp, [dest+2*PS]
|
||||
mov ptr, [dest+3*PS]
|
||||
mov vec_i, [dest+4*PS]
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [tmp+pos], xp3
|
||||
XSTR [ptr+pos], xp4
|
||||
XSTR [vec_i+pos], xp5
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_5VECT_DOT_PROD_AVX, 02, 03, 0194
|
362
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_5vect_dot_prod_avx2.asm
generated
vendored
362
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_5vect_dot_prod_avx2.asm
generated
vendored
|
@ -1,362 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_5vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_5VECT_DOT_PROD_AVX2 _gf_5vect_dot_prod_avx2
|
||||
%else
|
||||
%define GF_5VECT_DOT_PROD_AVX2 gf_5vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqa [rsp + 0*16], xmm6
|
||||
vmovdqa [rsp + 1*16], xmm7
|
||||
vmovdqa [rsp + 2*16], xmm8
|
||||
vmovdqa [rsp + 3*16], xmm9
|
||||
vmovdqa [rsp + 4*16], xmm10
|
||||
vmovdqa [rsp + 5*16], xmm11
|
||||
vmovdqa [rsp + 6*16], xmm12
|
||||
vmovdqa [rsp + 7*16], xmm13
|
||||
vmovdqa [rsp + 8*16], xmm14
|
||||
vmovdqa [rsp + 9*16], xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
save_reg rsi, 10*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
vmovdqa xmm15, [rsp + 9*16]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
mov rsi, [rsp + 10*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest1 tmp3
|
||||
%define dest2 tmp4
|
||||
%define vskip1 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm15
|
||||
%define xmask0fx xmm15
|
||||
%define xgft1_lo ymm14
|
||||
%define xgft1_hi ymm13
|
||||
%define xgft2_lo ymm12
|
||||
%define xgft2_hi ymm11
|
||||
%define xgft3_lo ymm10
|
||||
%define xgft3_hi ymm9
|
||||
%define xgft4_lo ymm8
|
||||
%define xgft4_hi ymm7
|
||||
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
%define xp3 ymm4
|
||||
%define xp4 ymm5
|
||||
%define xp5 ymm6
|
||||
|
||||
align 16
|
||||
global GF_5VECT_DOT_PROD_AVX2:function
|
||||
func(GF_5VECT_DOT_PROD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
mov vskip1, vec
|
||||
imul vskip1, 32
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest1, [dest]
|
||||
mov dest2, [dest+PS]
|
||||
|
||||
|
||||
.loop32:
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
vpxor xp4, xp4
|
||||
vpxor xp5, xp5
|
||||
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add vec_i, PS
|
||||
|
||||
vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
vmovdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
add tmp, 32
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
vpxor xp4, xgft4_hi ;xp4 += partial
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp5, xgft1_hi ;xp5 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
mov tmp, [dest+2*PS]
|
||||
mov ptr, [dest+3*PS]
|
||||
mov vec_i, [dest+4*PS]
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [tmp+pos], xp3
|
||||
XSTR [ptr+pos], xp4
|
||||
XSTR [vec_i+pos], xp5
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop32 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_5VECT_DOT_PROD_AVX2, 04, 03, 0199
|
349
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_5vect_dot_prod_sse.asm
generated
vendored
349
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_5vect_dot_prod_sse.asm
generated
vendored
|
@ -1,349 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_5vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_5VECT_DOT_PROD_SSE _gf_5vect_dot_prod_sse
|
||||
%else
|
||||
%define GF_5VECT_DOT_PROD_SSE gf_5vect_dot_prod_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm12, 6*16
|
||||
save_xmm128 xmm13, 7*16
|
||||
save_xmm128 xmm14, 8*16
|
||||
save_xmm128 xmm15, 9*16
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
save_reg rsi, 10*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
movdqa xmm9, [rsp + 3*16]
|
||||
movdqa xmm10, [rsp + 4*16]
|
||||
movdqa xmm11, [rsp + 5*16]
|
||||
movdqa xmm12, [rsp + 6*16]
|
||||
movdqa xmm13, [rsp + 7*16]
|
||||
movdqa xmm14, [rsp + 8*16]
|
||||
movdqa xmm15, [rsp + 9*16]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
mov rsi, [rsp + 10*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest1 tmp3
|
||||
%define dest2 tmp4
|
||||
%define vskip1 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft1_lo xmm2
|
||||
%define xgft1_hi xmm3
|
||||
%define xgft2_lo xmm4
|
||||
%define xgft2_hi xmm5
|
||||
%define xgft3_lo xmm10
|
||||
%define xgft3_hi xmm6
|
||||
%define xgft4_lo xmm8
|
||||
%define xgft4_hi xmm7
|
||||
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm9
|
||||
%define xp2 xmm11
|
||||
%define xp3 xmm12
|
||||
%define xp4 xmm13
|
||||
%define xp5 xmm14
|
||||
|
||||
align 16
|
||||
global GF_5VECT_DOT_PROD_SSE:function
|
||||
func(GF_5VECT_DOT_PROD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov vskip1, vec
|
||||
imul vskip1, 32
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest1, [dest]
|
||||
mov dest2, [dest+PS]
|
||||
|
||||
|
||||
.loop16:
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
pxor xp1, xp1
|
||||
pxor xp2, xp2
|
||||
pxor xp3, xp3
|
||||
pxor xp4, xp4
|
||||
pxor xp5, xp5
|
||||
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
add vec_i, PS
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
movdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
movdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
movdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
movdqu xgft1_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
add tmp, 32
|
||||
|
||||
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
pxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
pxor xp4, xgft4_hi ;xp4 += partial
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp5, xgft1_hi ;xp5 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
mov tmp, [dest+2*PS]
|
||||
mov ptr, [dest+3*PS]
|
||||
mov vec_i, [dest+4*PS]
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [tmp+pos], xp3
|
||||
XSTR [ptr+pos], xp4
|
||||
XSTR [vec_i+pos], xp5
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_5VECT_DOT_PROD_SSE, 00, 04, 0065
|
401
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_5vect_mad_avx.asm
generated
vendored
401
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_5vect_mad_avx.asm
generated
vendored
|
@ -1,401 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_5vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_5VECT_MAD_AVX _gf_5vect_mad_avx
|
||||
%else
|
||||
%define GF_5VECT_MAD_AVX gf_5vect_mad_avx
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13
|
||||
%define tmp4 r14
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*10 + 5*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
movdqa [rsp+16*3],xmm9
|
||||
movdqa [rsp+16*4],xmm10
|
||||
movdqa [rsp+16*5],xmm11
|
||||
movdqa [rsp+16*6],xmm12
|
||||
movdqa [rsp+16*7],xmm13
|
||||
movdqa [rsp+16*8],xmm14
|
||||
movdqa [rsp+16*9],xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
movdqa xmm9, [rsp+16*3]
|
||||
movdqa xmm10, [rsp+16*4]
|
||||
movdqa xmm11, [rsp+16*5]
|
||||
movdqa xmm12, [rsp+16*6]
|
||||
movdqa xmm13, [rsp+16*7]
|
||||
movdqa xmm14, [rsp+16*8]
|
||||
movdqa xmm15, [rsp+16*9]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define tmp4 r13
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define tmp4 r13
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
;;; gf_5vect_mad_avx(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 tmp4
|
||||
%define dest3 mul_array
|
||||
%define dest4 tmp2
|
||||
%define dest5 vec_i
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft5_hi xmm14
|
||||
%define xgft4_lo xmm13
|
||||
%define xgft4_hi xmm12
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xtmph1 xmm2
|
||||
%define xtmpl1 xmm3
|
||||
%define xtmph2 xmm4
|
||||
%define xtmpl2 xmm5
|
||||
%define xtmph3 xmm6
|
||||
%define xtmpl3 xmm7
|
||||
%define xtmph5 xmm8
|
||||
%define xtmpl5 xmm9
|
||||
%define xd1 xmm10
|
||||
%define xd2 xmm11
|
||||
%define xd3 xtmpl1
|
||||
%define xd4 xtmph1
|
||||
%define xd5 xtmpl2
|
||||
|
||||
|
||||
align 16
|
||||
global GF_5VECT_MAD_AVX:function
|
||||
func(GF_5VECT_MAD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov tmp, vec
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
lea tmp3, [mul_array + vec_i]
|
||||
sal tmp, 6 ;Multiply by 64
|
||||
vmovdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
sal vec, 5 ;Multiply by 32
|
||||
add tmp, vec
|
||||
vmovdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
|
||||
vmovdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
|
||||
|
||||
mov dest3, [dest1+2*PS] ; reuse mul_array
|
||||
mov dest4, [dest1+3*PS]
|
||||
mov dest5, [dest1+4*PS] ; reuse vec_i
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop16:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
|
||||
vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
vmovdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
vmovdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
XLDR xd2, [dest2+pos] ;Get next dest vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
|
||||
vpxor xd1, xd1, xtmph1
|
||||
|
||||
XLDR xd3, [dest3+pos] ;Reuse xtmpl1, Get next dest vector
|
||||
XLDR xd4, [dest4+pos] ;Reuse xtmph1, Get next dest vector
|
||||
|
||||
; dest2
|
||||
vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
|
||||
vpxor xd2, xd2, xtmph2
|
||||
|
||||
XLDR xd5, [dest5+pos] ;Reuse xtmpl2. Get next dest vector
|
||||
|
||||
; dest3
|
||||
vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
|
||||
vpxor xd3, xd3, xtmph3
|
||||
|
||||
; dest4
|
||||
vpshufb xtmph2, xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl3, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xtmpl3 ;GF add high and low partials
|
||||
vpxor xd4, xd4, xtmph2
|
||||
|
||||
; dest5
|
||||
vpshufb xtmph5, xgft5_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl5, xtmpl5, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph5, xtmph5, xtmpl5 ;GF add high and low partials
|
||||
vpxor xd5, xd5, xtmph5
|
||||
|
||||
XSTR [dest1+pos], xd1 ;Store result into dest1
|
||||
XSTR [dest2+pos], xd2 ;Store result into dest2
|
||||
XSTR [dest3+pos], xd3 ;Store result into dest3
|
||||
XSTR [dest4+pos], xd4 ;Store result into dest4
|
||||
XSTR [dest5+pos], xd5 ;Store result into dest5
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
.lessthan16:
|
||||
;; Tail len
|
||||
;; Do one more overlap pass
|
||||
mov tmp, len ;Overlapped offset length-16
|
||||
XLDR x0, [src+tmp] ;Get next source vector
|
||||
|
||||
sub len, pos
|
||||
|
||||
vmovdqa xtmph1, [constip16] ;Load const of i + 16
|
||||
vpinsrb xtmph5, len.w, 15
|
||||
vpshufb xtmph5, xmask0f ;Broadcast len to all bytes
|
||||
vpcmpgtb xtmph5, xtmph5, xtmph1
|
||||
|
||||
vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
vmovdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
vmovdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
|
||||
XLDR xd1, [dest1+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest2+tmp] ;Get next dest vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
|
||||
vpand xtmph1, xtmph1, xtmph5
|
||||
vpxor xd1, xd1, xtmph1
|
||||
|
||||
XLDR xd3, [dest3+tmp] ;Reuse xtmpl1, Get next dest vector
|
||||
XLDR xd4, [dest4+tmp] ;Reuse xtmph1, Get next dest vector
|
||||
|
||||
; dest2
|
||||
vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
|
||||
vpand xtmph2, xtmph2, xtmph5
|
||||
vpxor xd2, xd2, xtmph2
|
||||
|
||||
XLDR xd5, [dest5+tmp] ;Reuse xtmpl2. Get next dest vector
|
||||
|
||||
; dest3
|
||||
vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
|
||||
vpand xtmph3, xtmph3, xtmph5
|
||||
vpxor xd3, xd3, xtmph3
|
||||
|
||||
; dest4
|
||||
vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
vpand xgft4_hi, xgft4_hi, xtmph5
|
||||
vpxor xd4, xd4, xgft4_hi
|
||||
|
||||
; dest5
|
||||
vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl5, xtmpl5, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft5_hi, xgft5_hi, xtmpl5 ;GF add high and low partials
|
||||
vpand xgft5_hi, xgft5_hi, xtmph5
|
||||
vpxor xd5, xd5, xgft5_hi
|
||||
|
||||
XSTR [dest1+tmp], xd1 ;Store result into dest1
|
||||
XSTR [dest2+tmp], xd2 ;Store result into dest2
|
||||
XSTR [dest3+tmp], xd3 ;Store result into dest3
|
||||
XSTR [dest4+tmp], xd4 ;Store result into dest4
|
||||
XSTR [dest5+tmp], xd5 ;Store result into dest5
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
constip16:
|
||||
ddq 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_5VECT_MAD_AVX, 02, 00, 020d
|
393
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_5vect_mad_avx2.asm
generated
vendored
393
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_5vect_mad_avx2.asm
generated
vendored
|
@ -1,393 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_5vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_5VECT_MAD_AVX2 _gf_5vect_mad_avx2
|
||||
%else
|
||||
%define GF_5VECT_MAD_AVX2 gf_5vect_mad_avx2
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*10 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
movdqa [rsp+16*3],xmm9
|
||||
movdqa [rsp+16*4],xmm10
|
||||
movdqa [rsp+16*5],xmm11
|
||||
movdqa [rsp+16*6],xmm12
|
||||
movdqa [rsp+16*7],xmm13
|
||||
movdqa [rsp+16*8],xmm14
|
||||
movdqa [rsp+16*9],xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r15, 10*16 + 1*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
movdqa xmm9, [rsp+16*3]
|
||||
movdqa xmm10, [rsp+16*4]
|
||||
movdqa xmm11, [rsp+16*5]
|
||||
movdqa xmm12, [rsp+16*6]
|
||||
movdqa xmm13, [rsp+16*7]
|
||||
movdqa xmm14, [rsp+16*8]
|
||||
movdqa xmm15, [rsp+16*9]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r15, [rsp + 10*16 + 1*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
;;; gf_5vect_mad_avx2(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 tmp2
|
||||
%define dest3 mul_array
|
||||
%define dest4 vec
|
||||
%define dest5 vec_i
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm15
|
||||
%define xmask0fx xmm15
|
||||
%define xgft1_lo ymm14
|
||||
%define xgft2_lo ymm13
|
||||
%define xgft3_lo ymm12
|
||||
%define xgft4_lo ymm11
|
||||
%define xgft5_lo ymm10
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xtmpl ymm2
|
||||
%define xtmplx xmm2
|
||||
%define xtmph1 ymm3
|
||||
%define xtmph1x xmm3
|
||||
%define xtmph2 ymm4
|
||||
%define xd1 ymm5
|
||||
%define xd2 ymm6
|
||||
%define xd3 ymm7
|
||||
%define xd4 ymm8
|
||||
%define xd5 ymm9
|
||||
|
||||
align 16
|
||||
global GF_5VECT_MAD_AVX2:function
|
||||
func(GF_5VECT_MAD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
sal vec, 5 ;Multiply by 32
|
||||
lea tmp, [mul_array + vec_i]
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vmovdqu xgft5_lo, [tmp+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
add tmp, vec
|
||||
vmovdqu xgft4_lo, [tmp+2*vec] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
|
||||
mov dest3, [dest1+2*PS] ; reuse mul_array
|
||||
mov dest4, [dest1+3*PS] ; reuse vec
|
||||
mov dest5, [dest1+4*PS] ; reuse vec_i
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop32:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
XLDR xd2, [dest2+pos] ;Get next dest vector
|
||||
XLDR xd3, [dest3+pos] ;Get next dest vector
|
||||
XLDR xd4, [dest4+pos] ;Get next dest vector
|
||||
XLDR xd5, [dest5+pos] ;Get next dest vector
|
||||
|
||||
vpand xtmpl, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vperm2i128 xtmpa, xtmpl, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xtmpl, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
|
||||
; dest1
|
||||
vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials
|
||||
vpxor xd1, xd1, xtmph1 ;xd1 += partial
|
||||
|
||||
vperm2i128 xtmph1, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
; dest2
|
||||
vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xtmpl ;GF add high and low partials
|
||||
vpxor xd2, xd2, xtmph2 ;xd2 += partial
|
||||
|
||||
vperm2i128 xtmph2, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||
; dest3
|
||||
vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials
|
||||
vpxor xd3, xd3, xtmph1 ;xd3 += partial
|
||||
|
||||
vperm2i128 xtmph1, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo
|
||||
; dest4
|
||||
vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xtmpl ;GF add high and low partials
|
||||
vpxor xd4, xd4, xtmph2 ;xd4 += partial
|
||||
|
||||
; dest5
|
||||
vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft5_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials
|
||||
vpxor xd5, xd5, xtmph1 ;xd5 += partial
|
||||
|
||||
XSTR [dest1+pos], xd1
|
||||
XSTR [dest2+pos], xd2
|
||||
XSTR [dest3+pos], xd3
|
||||
XSTR [dest4+pos], xd4
|
||||
XSTR [dest5+pos], xd5
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
.lessthan32:
|
||||
;; Tail len
|
||||
;; Do one more overlap pass
|
||||
mov tmp.b, 0x1f
|
||||
vpinsrb xtmph1x, xtmph1x, tmp.w, 0
|
||||
vpbroadcastb xtmph1, xtmph1x ;Construct mask 0x1f1f1f...
|
||||
|
||||
mov tmp, len ;Overlapped offset length-32
|
||||
|
||||
XLDR x0, [src+tmp] ;Get next source vector
|
||||
|
||||
XLDR xd1, [dest1+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest2+tmp] ;Get next dest vector
|
||||
XLDR xd3, [dest3+tmp] ;Get next dest vector
|
||||
XLDR xd4, [dest4+tmp] ;Get next dest vector
|
||||
XLDR xd5, [dest5+tmp] ;Get next dest vector
|
||||
|
||||
sub len, pos
|
||||
|
||||
vmovdqa xtmph2, [constip32] ;Load const of i + 32
|
||||
vpinsrb xtmplx, xtmplx, len.w, 15
|
||||
vinserti128 xtmpl, xtmpl, xtmplx, 1 ;swapped to xtmplx | xtmplx
|
||||
vpshufb xtmpl, xtmpl, xtmph1 ;Broadcast len to all bytes. xtmph1=0x1f1f1f...
|
||||
vpcmpgtb xtmpl, xtmpl, xtmph2
|
||||
|
||||
vpand xtmph1, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vperm2i128 xtmpa, xtmph1, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xtmph1, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
|
||||
; dest1
|
||||
vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xgft1_lo ;GF add high and low partials
|
||||
vpand xtmph1, xtmph1, xtmpl
|
||||
vpxor xd1, xd1, xtmph1 ;xd1 += partial
|
||||
|
||||
vperm2i128 xtmph1, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
; dest2
|
||||
vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xgft2_lo ;GF add high and low partials
|
||||
vpand xtmph2, xtmph2, xtmpl
|
||||
vpxor xd2, xd2, xtmph2 ;xd2 += partial
|
||||
|
||||
vperm2i128 xtmph2, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||
; dest3
|
||||
vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xgft3_lo ;GF add high and low partials
|
||||
vpand xtmph1, xtmph1, xtmpl
|
||||
vpxor xd3, xd3, xtmph1 ;xd3 += partial
|
||||
|
||||
vperm2i128 xtmph1, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo
|
||||
; dest4
|
||||
vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xgft4_lo ;GF add high and low partials
|
||||
vpand xtmph2, xtmph2, xtmpl
|
||||
vpxor xd4, xd4, xtmph2 ;xd4 += partial
|
||||
|
||||
; dest5
|
||||
vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xgft5_lo ;GF add high and low partials
|
||||
vpand xtmph1, xtmph1, xtmpl
|
||||
vpxor xd5, xd5, xtmph1 ;xd5 += partial
|
||||
|
||||
XSTR [dest1+tmp], xd1
|
||||
XSTR [dest2+tmp], xd2
|
||||
XSTR [dest3+tmp], xd3
|
||||
XSTR [dest4+tmp], xd4
|
||||
XSTR [dest5+tmp], xd5
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
align 32
|
||||
constip32:
|
||||
ddq 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
|
||||
ddq 0xe0e1e2e3e4e5e6e7e8e9eaebecedeeef
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_5VECT_MAD_AVX2, 04, 00, 020e
|
409
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_5vect_mad_sse.asm
generated
vendored
409
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_5vect_mad_sse.asm
generated
vendored
|
@ -1,409 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_5vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_5VECT_MAD_SSE _gf_5vect_mad_sse
|
||||
%else
|
||||
%define GF_5VECT_MAD_SSE gf_5vect_mad_sse
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13
|
||||
%define tmp4 r14
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*10 + 5*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
movdqa [rsp+16*3],xmm9
|
||||
movdqa [rsp+16*4],xmm10
|
||||
movdqa [rsp+16*5],xmm11
|
||||
movdqa [rsp+16*6],xmm12
|
||||
movdqa [rsp+16*7],xmm13
|
||||
movdqa [rsp+16*8],xmm14
|
||||
movdqa [rsp+16*9],xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
movdqa xmm9, [rsp+16*3]
|
||||
movdqa xmm10, [rsp+16*4]
|
||||
movdqa xmm11, [rsp+16*5]
|
||||
movdqa xmm12, [rsp+16*6]
|
||||
movdqa xmm13, [rsp+16*7]
|
||||
movdqa xmm14, [rsp+16*8]
|
||||
movdqa xmm15, [rsp+16*9]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define tmp4 r13
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define tmp4 r13
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
;;; gf_5vect_mad_sse(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 tmp4
|
||||
%define dest3 mul_array
|
||||
%define dest4 tmp2
|
||||
%define dest5 vec_i
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft5_hi xmm14
|
||||
%define xgft4_lo xmm13
|
||||
%define xgft4_hi xmm12
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xtmph1 xmm2
|
||||
%define xtmpl1 xmm3
|
||||
%define xtmph2 xmm4
|
||||
%define xtmpl2 xmm5
|
||||
%define xtmph3 xmm6
|
||||
%define xtmpl3 xmm7
|
||||
%define xtmph5 xmm8
|
||||
%define xtmpl5 xmm9
|
||||
%define xd1 xmm10
|
||||
%define xd2 xmm11
|
||||
%define xd3 xtmpl1
|
||||
%define xd4 xtmph1
|
||||
%define xd5 xtmpl2
|
||||
|
||||
|
||||
align 16
|
||||
global GF_5VECT_MAD_SSE:function
|
||||
func(GF_5VECT_MAD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov tmp, vec
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
lea tmp3, [mul_array + vec_i]
|
||||
sal tmp, 6 ;Multiply by 64
|
||||
movdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
sal vec, 5 ;Multiply by 32
|
||||
add tmp, vec
|
||||
movdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
|
||||
movdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
|
||||
|
||||
mov dest3, [dest1+2*PS] ; reuse mul_array
|
||||
mov dest4, [dest1+3*PS]
|
||||
mov dest5, [dest1+4*PS] ; reuse vec_i
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop16:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
|
||||
movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
movdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
movdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
movdqa xtmph5, xgft5_hi ;Reload const array registers
|
||||
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
XLDR xd2, [dest2+pos] ;Get next dest vector
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
pshufb xtmph1, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph1, xtmpl1 ;GF add high and low partials
|
||||
pxor xd1, xtmph1
|
||||
|
||||
XLDR xd3, [dest3+pos] ;Reuse xtmpl1, Get next dest vector
|
||||
XLDR xd4, [dest4+pos] ;Reuse xtmph1. Get next dest vector
|
||||
|
||||
; dest2
|
||||
pshufb xtmph2, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph2, xtmpl2 ;GF add high and low partials
|
||||
pxor xd2, xtmph2
|
||||
|
||||
XLDR xd5, [dest5+pos] ;Reuse xtmpl2. Get next dest vector
|
||||
|
||||
; dest3
|
||||
pshufb xtmph3, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph3, xtmpl3 ;GF add high and low partials
|
||||
pxor xd3, xtmph3
|
||||
|
||||
movdqa xtmph2, xgft4_hi ;Reload const array registers
|
||||
movdqa xtmpl3, xgft4_lo ;Reload const array registers
|
||||
|
||||
; dest5
|
||||
pshufb xtmph5, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl5, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph5, xtmpl5 ;GF add high and low partials
|
||||
pxor xd5, xtmph5
|
||||
|
||||
; dest4
|
||||
pshufb xtmph2, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph2, xtmpl3 ;GF add high and low partials
|
||||
pxor xd4, xtmph2
|
||||
|
||||
XSTR [dest1+pos], xd1 ;Store result into dest1
|
||||
XSTR [dest2+pos], xd2 ;Store result into dest2
|
||||
XSTR [dest3+pos], xd3 ;Store result into dest3
|
||||
XSTR [dest4+pos], xd4 ;Store result into dest4
|
||||
XSTR [dest5+pos], xd5 ;Store result into dest5
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
.lessthan16:
|
||||
;; Tail len
|
||||
;; Do one more overlap pass
|
||||
mov tmp, len ;Overlapped offset length-16
|
||||
XLDR x0, [src+tmp] ;Get next source vector
|
||||
|
||||
sub len, pos
|
||||
|
||||
movdqa xtmpl1, [constip16] ;Load const of i + 16
|
||||
pinsrb xtmph5, len.w, 15
|
||||
pshufb xtmph5, xmask0f ;Broadcast len to all bytes
|
||||
pcmpgtb xtmph5, xtmpl1
|
||||
|
||||
movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
movdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
movdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
|
||||
XLDR xd1, [dest1+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest2+tmp] ;Get next dest vector
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
; dest1
|
||||
pshufb xtmph1, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph1, xtmpl1 ;GF add high and low partials
|
||||
pand xtmph1, xtmph5
|
||||
pxor xd1, xtmph1
|
||||
|
||||
XLDR xd3, [dest3+tmp] ;Reuse xtmpl1, Get next dest vector
|
||||
XLDR xd4, [dest4+tmp] ;Reuse xtmph1. Get next dest vector
|
||||
|
||||
; dest2
|
||||
pshufb xtmph2, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph2, xtmpl2 ;GF add high and low partials
|
||||
pand xtmph2, xtmph5
|
||||
pxor xd2, xtmph2
|
||||
|
||||
XLDR xd5, [dest5+tmp] ;Reuse xtmpl2. Get next dest vector
|
||||
|
||||
; dest3
|
||||
pshufb xtmph3, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph3, xtmpl3 ;GF add high and low partials
|
||||
pand xtmph3, xtmph5
|
||||
pxor xd3, xtmph3
|
||||
|
||||
; dest4
|
||||
pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
pand xgft4_hi, xtmph5
|
||||
pxor xd4, xgft4_hi
|
||||
|
||||
; dest5
|
||||
pshufb xgft5_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl5, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft5_hi, xtmpl5 ;GF add high and low partials
|
||||
pand xgft5_hi, xtmph5
|
||||
pxor xd5, xgft5_hi
|
||||
|
||||
XSTR [dest1+tmp], xd1 ;Store result into dest1
|
||||
XSTR [dest2+tmp], xd2 ;Store result into dest2
|
||||
XSTR [dest3+tmp], xd3 ;Store result into dest3
|
||||
XSTR [dest4+tmp], xd4 ;Store result into dest4
|
||||
XSTR [dest5+tmp], xd5 ;Store result into dest5
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
mask0f:
|
||||
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
constip16:
|
||||
ddq 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_5VECT_MAD_SSE, 00, 00, 020c
|
360
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_6vect_dot_prod_avx.asm
generated
vendored
360
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_6vect_dot_prod_avx.asm
generated
vendored
|
@ -1,360 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_6vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_6VECT_DOT_PROD_AVX _gf_6vect_dot_prod_avx
|
||||
%else
|
||||
%define GF_6VECT_DOT_PROD_AVX gf_6vect_dot_prod_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm12, 6*16
|
||||
save_xmm128 xmm13, 7*16
|
||||
save_xmm128 xmm14, 8*16
|
||||
save_xmm128 xmm15, 9*16
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
save_reg rsi, 10*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
vmovdqa xmm15, [rsp + 9*16]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
mov rsi, [rsp + 10*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest1 tmp3
|
||||
%define dest2 tmp4
|
||||
%define vskip1 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft1_lo xmm14
|
||||
%define xgft1_hi xmm13
|
||||
%define xgft2_lo xmm12
|
||||
%define xgft2_hi xmm11
|
||||
%define xgft3_lo xmm10
|
||||
%define xgft3_hi xmm9
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%define xp4 xmm5
|
||||
%define xp5 xmm6
|
||||
%define xp6 xmm7
|
||||
|
||||
align 16
|
||||
global GF_6VECT_DOT_PROD_AVX:function
|
||||
func(GF_6VECT_DOT_PROD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov vskip1, vec
|
||||
imul vskip1, 32
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest1, [dest]
|
||||
mov dest2, [dest+PS]
|
||||
|
||||
|
||||
.loop16:
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
vpxor xp4, xp4
|
||||
vpxor xp5, xp5
|
||||
vpxor xp6, xp6
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
add vec_i, PS
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
|
||||
vmovdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
vmovdqu xgft1_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
vmovdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
|
||||
vmovdqu xgft3_hi, [tmp+ptr+16] ; " Fx{00}, Fx{10}, ..., Fx{f0}
|
||||
add tmp, 32
|
||||
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp4, xgft1_hi ;xp4 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp5, xgft2_hi ;xp5 += partial
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp6, xgft3_hi ;xp6 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
|
||||
mov tmp, [dest+2*PS]
|
||||
mov ptr, [dest+3*PS]
|
||||
mov vec_i, [dest+4*PS]
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [tmp+pos], xp3
|
||||
mov tmp, [dest+5*PS]
|
||||
XSTR [ptr+pos], xp4
|
||||
XSTR [vec_i+pos], xp5
|
||||
XSTR [tmp+pos], xp6
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_6VECT_DOT_PROD_AVX, 02, 03, 0195
|
373
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_6vect_dot_prod_avx2.asm
generated
vendored
373
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_6vect_dot_prod_avx2.asm
generated
vendored
|
@ -1,373 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_6vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_6VECT_DOT_PROD_AVX2 _gf_6vect_dot_prod_avx2
|
||||
%else
|
||||
%define GF_6VECT_DOT_PROD_AVX2 gf_6vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqa [rsp + 0*16], xmm6
|
||||
vmovdqa [rsp + 1*16], xmm7
|
||||
vmovdqa [rsp + 2*16], xmm8
|
||||
vmovdqa [rsp + 3*16], xmm9
|
||||
vmovdqa [rsp + 4*16], xmm10
|
||||
vmovdqa [rsp + 5*16], xmm11
|
||||
vmovdqa [rsp + 6*16], xmm12
|
||||
vmovdqa [rsp + 7*16], xmm13
|
||||
vmovdqa [rsp + 8*16], xmm14
|
||||
vmovdqa [rsp + 9*16], xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
save_reg rsi, 10*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
vmovdqa xmm15, [rsp + 9*16]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
mov rsi, [rsp + 10*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest1 tmp3
|
||||
%define dest2 tmp4
|
||||
%define vskip1 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm15
|
||||
%define xmask0fx xmm15
|
||||
%define xgft1_lo ymm14
|
||||
%define xgft1_hi ymm13
|
||||
%define xgft2_lo ymm12
|
||||
%define xgft2_hi ymm11
|
||||
%define xgft3_lo ymm10
|
||||
%define xgft3_hi ymm9
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
%define xp3 ymm4
|
||||
%define xp4 ymm5
|
||||
%define xp5 ymm6
|
||||
%define xp6 ymm7
|
||||
|
||||
align 16
|
||||
global GF_6VECT_DOT_PROD_AVX2:function
|
||||
func(GF_6VECT_DOT_PROD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
mov vskip1, vec
|
||||
imul vskip1, 32
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest1, [dest]
|
||||
mov dest2, [dest+PS]
|
||||
|
||||
|
||||
.loop32:
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
vpxor xp4, xp4
|
||||
vpxor xp5, xp5
|
||||
vpxor xp6, xp6
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add vec_i, PS
|
||||
|
||||
vpand xgft3_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vperm2i128 xtmpa, xgft3_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xgft3_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
|
||||
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
|
||||
vmovdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
vmovdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
|
||||
; " Fx{00}, Fx{10}, ..., Fx{f0}
|
||||
add tmp, 32
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp4, xgft1_hi ;xp4 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp5, xgft2_hi ;xp5 += partial
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp6, xgft3_hi ;xp6 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
|
||||
mov tmp, [dest+2*PS]
|
||||
mov ptr, [dest+3*PS]
|
||||
mov vec_i, [dest+4*PS]
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [tmp+pos], xp3
|
||||
mov tmp, [dest+5*PS]
|
||||
XSTR [ptr+pos], xp4
|
||||
XSTR [vec_i+pos], xp5
|
||||
XSTR [tmp+pos], xp6
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop32 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_6VECT_DOT_PROD_AVX2, 04, 03, 019a
|
360
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_6vect_dot_prod_sse.asm
generated
vendored
360
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_6vect_dot_prod_sse.asm
generated
vendored
|
@ -1,360 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_6vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_6VECT_DOT_PROD_SSE _gf_6vect_dot_prod_sse
|
||||
%else
|
||||
%define GF_6VECT_DOT_PROD_SSE gf_6vect_dot_prod_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm12, 6*16
|
||||
save_xmm128 xmm13, 7*16
|
||||
save_xmm128 xmm14, 8*16
|
||||
save_xmm128 xmm15, 9*16
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
save_reg rsi, 10*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
movdqa xmm9, [rsp + 3*16]
|
||||
movdqa xmm10, [rsp + 4*16]
|
||||
movdqa xmm11, [rsp + 5*16]
|
||||
movdqa xmm12, [rsp + 6*16]
|
||||
movdqa xmm13, [rsp + 7*16]
|
||||
movdqa xmm14, [rsp + 8*16]
|
||||
movdqa xmm15, [rsp + 9*16]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
mov rsi, [rsp + 10*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest1 tmp3
|
||||
%define dest2 tmp4
|
||||
%define vskip1 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft1_lo xmm2
|
||||
%define xgft1_hi xmm3
|
||||
%define xgft2_lo xmm4
|
||||
%define xgft2_hi xmm5
|
||||
%define xgft3_lo xmm6
|
||||
%define xgft3_hi xmm7
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm8
|
||||
%define xp2 xmm9
|
||||
%define xp3 xmm10
|
||||
%define xp4 xmm11
|
||||
%define xp5 xmm12
|
||||
%define xp6 xmm13
|
||||
|
||||
align 16
|
||||
global GF_6VECT_DOT_PROD_SSE:function
|
||||
func(GF_6VECT_DOT_PROD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov vskip1, vec
|
||||
imul vskip1, 32
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest1, [dest]
|
||||
mov dest2, [dest+PS]
|
||||
|
||||
|
||||
.loop16:
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
pxor xp1, xp1
|
||||
pxor xp2, xp2
|
||||
pxor xp3, xp3
|
||||
pxor xp4, xp4
|
||||
pxor xp5, xp5
|
||||
pxor xp6, xp6
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
add vec_i, PS
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
movdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
movdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
pxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
|
||||
movdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
movdqu xgft1_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
movdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
movdqu xgft2_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
movdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
|
||||
movdqu xgft3_hi, [tmp+ptr+16] ; " Fx{00}, Fx{10}, ..., Fx{f0}
|
||||
add tmp, 32
|
||||
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp4, xgft1_hi ;xp4 += partial
|
||||
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pxor xp5, xgft2_hi ;xp5 += partial
|
||||
|
||||
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
pxor xp6, xgft3_hi ;xp6 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
|
||||
mov tmp, [dest+2*PS]
|
||||
mov ptr, [dest+3*PS]
|
||||
mov vec_i, [dest+4*PS]
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [tmp+pos], xp3
|
||||
mov tmp, [dest+5*PS]
|
||||
XSTR [ptr+pos], xp4
|
||||
XSTR [vec_i+pos], xp5
|
||||
XSTR [tmp+pos], xp6
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_6VECT_DOT_PROD_SSE, 00, 04, 0066
|
433
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_6vect_mad_avx.asm
generated
vendored
433
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_6vect_mad_avx.asm
generated
vendored
|
@ -1,433 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_6vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_6VECT_MAD_AVX _gf_6vect_mad_avx
|
||||
%else
|
||||
%define GF_6VECT_MAD_AVX gf_6vect_mad_avx
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13
|
||||
%define tmp4 r14
|
||||
%define tmp5 rdi
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*10 + 5*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
movdqa [rsp+16*3],xmm9
|
||||
movdqa [rsp+16*4],xmm10
|
||||
movdqa [rsp+16*5],xmm11
|
||||
movdqa [rsp+16*6],xmm12
|
||||
movdqa [rsp+16*7],xmm13
|
||||
movdqa [rsp+16*8],xmm14
|
||||
movdqa [rsp+16*9],xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
movdqa xmm9, [rsp+16*3]
|
||||
movdqa xmm10, [rsp+16*4]
|
||||
movdqa xmm11, [rsp+16*5]
|
||||
movdqa xmm12, [rsp+16*6]
|
||||
movdqa xmm13, [rsp+16*7]
|
||||
movdqa xmm14, [rsp+16*8]
|
||||
movdqa xmm15, [rsp+16*9]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define tmp4 r13
|
||||
%define tmp5 r14
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define tmp4 r13
|
||||
%define tmp5 r14
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
;;; gf_6vect_mad_avx(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 tmp4
|
||||
%define dest3 tmp2
|
||||
%define dest4 mul_array
|
||||
%define dest5 tmp5
|
||||
%define dest6 vec_i
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft4_lo xmm14
|
||||
%define xgft4_hi xmm13
|
||||
%define xgft5_lo xmm12
|
||||
%define xgft5_hi xmm11
|
||||
%define xgft6_lo xmm10
|
||||
%define xgft6_hi xmm9
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xtmph1 xmm2
|
||||
%define xtmpl1 xmm3
|
||||
%define xtmph2 xmm4
|
||||
%define xtmpl2 xmm5
|
||||
%define xtmph3 xmm6
|
||||
%define xtmpl3 xmm7
|
||||
%define xd1 xmm8
|
||||
%define xd2 xtmpl1
|
||||
%define xd3 xtmph1
|
||||
|
||||
|
||||
align 16
|
||||
global GF_6VECT_MAD_AVX:function
|
||||
func(GF_6VECT_MAD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov tmp, vec
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
lea tmp3, [mul_array + vec_i]
|
||||
sal tmp, 6 ;Multiply by 64
|
||||
|
||||
sal vec, 5 ;Multiply by 32
|
||||
lea vec_i, [tmp + vec] ;vec_i = vec*96
|
||||
lea mul_array, [tmp + vec_i] ;mul_array = vec*160
|
||||
|
||||
vmovdqu xgft5_lo, [tmp3+2*tmp] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
vmovdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
vmovdqu xgft4_lo, [tmp3+vec_i] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
|
||||
vmovdqu xgft4_hi, [tmp3+vec_i+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
|
||||
vmovdqu xgft6_lo, [tmp3+mul_array] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
|
||||
vmovdqu xgft6_hi, [tmp3+mul_array+16] ; " Fx{00}, Fx{10}, ..., Fx{f0}
|
||||
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest3, [dest1+2*PS]
|
||||
mov dest4, [dest1+3*PS] ; reuse mul_array
|
||||
mov dest5, [dest1+4*PS]
|
||||
mov dest6, [dest1+5*PS] ; reuse vec_i
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop16:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
|
||||
vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
vmovdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
|
||||
;dest1
|
||||
vpshufb xtmph1, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmpl1 ;GF add high and low partials
|
||||
vpxor xd1, xtmph1
|
||||
|
||||
XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector
|
||||
XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest vector
|
||||
|
||||
;dest2
|
||||
vpshufb xtmph2, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmpl2 ;GF add high and low partials
|
||||
vpxor xd2, xtmph2
|
||||
|
||||
;dest3
|
||||
vpshufb xtmph3, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph3, xtmpl3 ;GF add high and low partials
|
||||
vpxor xd3, xtmph3
|
||||
|
||||
XSTR [dest1+pos], xd1 ;Store result into dest1
|
||||
XSTR [dest2+pos], xd2 ;Store result into dest2
|
||||
XSTR [dest3+pos], xd3 ;Store result into dest3
|
||||
|
||||
;dest4
|
||||
XLDR xd1, [dest4+pos] ;Get next dest vector
|
||||
vpshufb xtmph1, xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl1, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials
|
||||
vpxor xd1, xd1, xtmph1
|
||||
|
||||
XLDR xd2, [dest5+pos] ;reuse xtmpl1. Get next dest vector
|
||||
XLDR xd3, [dest6+pos] ;reuse xtmph1. Get next dest vector
|
||||
|
||||
;dest5
|
||||
vpshufb xtmph2, xgft5_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl2, xgft5_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials
|
||||
vpxor xd2, xd2, xtmph2
|
||||
|
||||
;dest6
|
||||
vpshufb xtmph3, xgft6_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl3, xgft6_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials
|
||||
vpxor xd3, xd3, xtmph3
|
||||
|
||||
XSTR [dest4+pos], xd1 ;Store result into dest4
|
||||
XSTR [dest5+pos], xd2 ;Store result into dest5
|
||||
XSTR [dest6+pos], xd3 ;Store result into dest6
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
.lessthan16:
|
||||
;; Tail len
|
||||
;; Do one more overlap pass
|
||||
;; Overlapped offset length-16
|
||||
mov tmp, len ;Backup len as len=rdi
|
||||
|
||||
XLDR x0, [src+tmp] ;Get next source vector
|
||||
XLDR xd1, [dest4+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest5+tmp] ;reuse xtmpl1. Get next dest vector
|
||||
XLDR xd3, [dest6+tmp] ;reuse xtmph1. Get next dest vector
|
||||
|
||||
sub len, pos
|
||||
|
||||
vmovdqa xtmph3, [constip16] ;Load const of i + 16
|
||||
vpinsrb xtmpl3, len.w, 15
|
||||
vpshufb xtmpl3, xmask0f ;Broadcast len to all bytes
|
||||
vpcmpgtb xtmpl3, xtmpl3, xtmph3
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
;dest4
|
||||
vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
vpand xgft4_hi, xgft4_hi, xtmpl3
|
||||
vpxor xd1, xd1, xgft4_hi
|
||||
|
||||
;dest5
|
||||
vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft5_hi, xgft5_hi, xgft5_lo ;GF add high and low partials
|
||||
vpand xgft5_hi, xgft5_hi, xtmpl3
|
||||
vpxor xd2, xd2, xgft5_hi
|
||||
|
||||
;dest6
|
||||
vpshufb xgft6_hi, xgft6_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft6_hi, xgft6_hi, xgft6_lo ;GF add high and low partials
|
||||
vpand xgft6_hi, xgft6_hi, xtmpl3
|
||||
vpxor xd3, xd3, xgft6_hi
|
||||
|
||||
XSTR [dest4+tmp], xd1 ;Store result into dest4
|
||||
XSTR [dest5+tmp], xd2 ;Store result into dest5
|
||||
XSTR [dest6+tmp], xd3 ;Store result into dest6
|
||||
|
||||
vmovdqu xgft4_lo, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
vmovdqu xgft4_hi, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
vmovdqu xgft5_lo, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
vmovdqu xgft5_hi, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
vmovdqu xgft6_lo, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
vmovdqu xgft6_hi, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
XLDR xd1, [dest1+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector
|
||||
XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest3 vector
|
||||
|
||||
;dest1
|
||||
vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
vpand xgft4_hi, xgft4_hi, xtmpl3
|
||||
vpxor xd1, xd1, xgft4_hi
|
||||
|
||||
;dest2
|
||||
vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft5_hi, xgft5_hi, xgft5_lo ;GF add high and low partials
|
||||
vpand xgft5_hi, xgft5_hi, xtmpl3
|
||||
vpxor xd2, xd2, xgft5_hi
|
||||
|
||||
;dest3
|
||||
vpshufb xgft6_hi, xgft6_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft6_hi, xgft6_hi, xgft6_lo ;GF add high and low partials
|
||||
vpand xgft6_hi, xgft6_hi, xtmpl3
|
||||
vpxor xd3, xd3, xgft6_hi
|
||||
|
||||
XSTR [dest1+tmp], xd1 ;Store result into dest1
|
||||
XSTR [dest2+tmp], xd2 ;Store result into dest2
|
||||
XSTR [dest3+tmp], xd3 ;Store result into dest3
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
constip16:
|
||||
ddq 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_6VECT_MAD_AVX, 02, 00, 0210
|
435
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_6vect_mad_avx2.asm
generated
vendored
435
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_6vect_mad_avx2.asm
generated
vendored
|
@ -1,435 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_6vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_6VECT_MAD_AVX2 _gf_6vect_mad_avx2
|
||||
%else
|
||||
%define GF_6VECT_MAD_AVX2 gf_6vect_mad_avx2
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*10 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
movdqa [rsp+16*3],xmm9
|
||||
movdqa [rsp+16*4],xmm10
|
||||
movdqa [rsp+16*5],xmm11
|
||||
movdqa [rsp+16*6],xmm12
|
||||
movdqa [rsp+16*7],xmm13
|
||||
movdqa [rsp+16*8],xmm14
|
||||
movdqa [rsp+16*9],xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r15, 10*16 + 2*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
movdqa xmm9, [rsp+16*3]
|
||||
movdqa xmm10, [rsp+16*4]
|
||||
movdqa xmm11, [rsp+16*5]
|
||||
movdqa xmm12, [rsp+16*6]
|
||||
movdqa xmm13, [rsp+16*7]
|
||||
movdqa xmm14, [rsp+16*8]
|
||||
movdqa xmm15, [rsp+16*9]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
;;; gf_6vect_mad_avx2(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp2
|
||||
%define dest4 mul_array
|
||||
%define dest5 vec
|
||||
%define dest6 vec_i
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm15
|
||||
%define xmask0fx xmm15
|
||||
%define xgft1_lo ymm14
|
||||
%define xgft2_lo ymm13
|
||||
%define xgft3_lo ymm12
|
||||
%define xgft4_lo ymm11
|
||||
%define xgft5_lo ymm10
|
||||
%define xgft6_lo ymm9
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xtmpl ymm2
|
||||
%define xtmplx xmm2
|
||||
%define xtmph ymm3
|
||||
%define xtmphx xmm3
|
||||
%define xd1 ymm4
|
||||
%define xd2 ymm5
|
||||
%define xd3 ymm6
|
||||
%define xd4 ymm7
|
||||
%define xd5 ymm8
|
||||
%define xd6 xd1
|
||||
|
||||
align 16
|
||||
global GF_6VECT_MAD_AVX2:function
|
||||
func(GF_6VECT_MAD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
sal vec, 5 ;Multiply by 32
|
||||
lea tmp, [mul_array + vec_i]
|
||||
mov vec_i, vec
|
||||
mov mul_array, vec
|
||||
sal vec_i, 1
|
||||
sal mul_array, 1
|
||||
add vec_i, vec ;vec_i=vec*96
|
||||
add mul_array, vec_i ;vec_i=vec*160
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vmovdqu xgft4_lo, [tmp+vec_i] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
|
||||
; " Fx{00}, Fx{10}, ..., Fx{f0}
|
||||
vmovdqu xgft5_lo, [tmp+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
vmovdqu xgft6_lo, [tmp+mul_array] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
|
||||
mov dest2, [dest1+PS] ; reuse tmp3
|
||||
mov dest3, [dest1+2*PS] ; reuse tmp2
|
||||
mov dest4, [dest1+3*PS] ; reuse mul_array
|
||||
mov dest5, [dest1+4*PS] ; reuse vec
|
||||
mov dest6, [dest1+5*PS] ; reuse vec_i
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop32:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
XLDR xd2, [dest2+pos] ;Get next dest vector
|
||||
XLDR xd3, [dest3+pos] ;Get next dest vector
|
||||
XLDR xd4, [dest4+pos] ;Get next dest vector
|
||||
XLDR xd5, [dest5+pos] ;Get next dest vector
|
||||
|
||||
vpand xtmpl, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vperm2i128 xtmpa, xtmpl, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xtmpl, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
;dest1
|
||||
vperm2i128 xtmph, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
|
||||
vpxor xd1, xd1, xtmph ;xd1 += partial
|
||||
|
||||
XSTR [dest1+pos], xd1 ;Store result into dest1
|
||||
|
||||
;dest2
|
||||
vperm2i128 xtmph, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
|
||||
vpxor xd2, xd2, xtmph ;xd2 += partial
|
||||
|
||||
;dest3
|
||||
vperm2i128 xtmph, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
|
||||
vpxor xd3, xd3, xtmph ;xd3 += partial
|
||||
|
||||
XLDR xd6, [dest6+pos] ;reuse xd1. Get next dest vector
|
||||
|
||||
;dest4
|
||||
vperm2i128 xtmph, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||
vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
|
||||
vpxor xd4, xd4, xtmph ;xd4 += partial
|
||||
|
||||
;dest5
|
||||
vperm2i128 xtmph, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo
|
||||
vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft5_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
|
||||
vpxor xd5, xd5, xtmph ;xd5 += partial
|
||||
|
||||
;dest6
|
||||
vperm2i128 xtmph, xgft6_lo, xgft6_lo, 0x01 ; swapped to hi | lo
|
||||
vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft6_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
|
||||
vpxor xd6, xd6, xtmph ;xd6 += partial
|
||||
|
||||
XSTR [dest2+pos], xd2 ;Store result into dest2
|
||||
XSTR [dest3+pos], xd3 ;Store result into dest3
|
||||
XSTR [dest4+pos], xd4 ;Store result into dest4
|
||||
XSTR [dest5+pos], xd5 ;Store result into dest5
|
||||
XSTR [dest6+pos], xd6 ;Store result into dest6
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
.lessthan32:
|
||||
;; Tail len
|
||||
;; Do one more overlap pass
|
||||
mov tmp.b, 0x1f
|
||||
vpinsrb xtmphx, xtmphx, tmp.w, 0
|
||||
vpbroadcastb xtmph, xtmphx ;Construct mask 0x1f1f1f...
|
||||
|
||||
mov tmp, len ;Overlapped offset length-32
|
||||
|
||||
XLDR x0, [src+tmp] ;Get next source vector
|
||||
XLDR xd1, [dest1+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest2+tmp] ;Get next dest vector
|
||||
XLDR xd3, [dest3+tmp] ;Get next dest vector
|
||||
XLDR xd4, [dest4+tmp] ;Get next dest vector
|
||||
XLDR xd5, [dest5+tmp] ;Get next dest vector
|
||||
|
||||
sub len, pos
|
||||
|
||||
vpinsrb xtmplx, xtmplx, len.w, 15
|
||||
vinserti128 xtmpl, xtmpl, xtmplx, 1 ;swapped to xtmplx | xtmplx
|
||||
vpshufb xtmpl, xtmpl, xtmph ;Broadcast len to all bytes. xtmph=0x1f1f1f...
|
||||
vpcmpgtb xtmpl, xtmpl, [constip32]
|
||||
|
||||
vpand xtmph, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vperm2i128 xtmpa, xtmph, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xtmph, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
;dest1
|
||||
vperm2i128 xtmph, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xgft1_lo ;GF add high and low partials
|
||||
vpand xtmph, xtmph, xtmpl
|
||||
vpxor xd1, xd1, xtmph ;xd1 += partial
|
||||
|
||||
XSTR [dest1+tmp], xd1 ;Store result into dest1
|
||||
|
||||
;dest2
|
||||
vperm2i128 xtmph, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xgft2_lo ;GF add high and low partials
|
||||
vpand xtmph, xtmph, xtmpl
|
||||
vpxor xd2, xd2, xtmph ;xd2 += partial
|
||||
|
||||
;dest3
|
||||
vperm2i128 xtmph, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xgft3_lo ;GF add high and low partials
|
||||
vpand xtmph, xtmph, xtmpl
|
||||
vpxor xd3, xd3, xtmph ;xd3 += partial
|
||||
|
||||
XLDR xd6, [dest6+tmp] ;reuse xd1. Get next dest vector
|
||||
|
||||
;dest4
|
||||
vperm2i128 xtmph, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||
vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xgft4_lo ;GF add high and low partials
|
||||
vpand xtmph, xtmph, xtmpl
|
||||
vpxor xd4, xd4, xtmph ;xd4 += partial
|
||||
|
||||
;dest5
|
||||
vperm2i128 xtmph, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo
|
||||
vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xgft5_lo ;GF add high and low partials
|
||||
vpand xtmph, xtmph, xtmpl
|
||||
vpxor xd5, xd5, xtmph ;xd5 += partial
|
||||
|
||||
;dest6
|
||||
vperm2i128 xtmph, xgft6_lo, xgft6_lo, 0x01 ; swapped to hi | lo
|
||||
vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xgft6_lo ;GF add high and low partials
|
||||
vpand xtmph, xtmph, xtmpl
|
||||
vpxor xd6, xd6, xtmph ;xd6 += partial
|
||||
|
||||
XSTR [dest2+tmp], xd2 ;Store result into dest2
|
||||
XSTR [dest3+tmp], xd3 ;Store result into dest3
|
||||
XSTR [dest4+tmp], xd4 ;Store result into dest4
|
||||
XSTR [dest5+tmp], xd5 ;Store result into dest5
|
||||
XSTR [dest6+tmp], xd6 ;Store result into dest6
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
align 32
|
||||
constip32:
|
||||
ddq 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
|
||||
ddq 0xe0e1e2e3e4e5e6e7e8e9eaebecedeeef
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_6VECT_MAD_AVX2, 04, 00, 0211
|
446
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_6vect_mad_sse.asm
generated
vendored
446
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_6vect_mad_sse.asm
generated
vendored
|
@ -1,446 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_6vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_6VECT_MAD_SSE _gf_6vect_mad_sse
|
||||
%else
|
||||
%define GF_6VECT_MAD_SSE gf_6vect_mad_sse
|
||||
%endif
|
||||
|
||||
%define PS 8
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13
|
||||
%define tmp4 r14
|
||||
%define tmp5 rdi
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define stack_size 16*10 + 5*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
movdqa [rsp+16*3],xmm9
|
||||
movdqa [rsp+16*4],xmm10
|
||||
movdqa [rsp+16*5],xmm11
|
||||
movdqa [rsp+16*6],xmm12
|
||||
movdqa [rsp+16*7],xmm13
|
||||
movdqa [rsp+16*8],xmm14
|
||||
movdqa [rsp+16*9],xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
movdqa xmm9, [rsp+16*3]
|
||||
movdqa xmm10, [rsp+16*4]
|
||||
movdqa xmm11, [rsp+16*5]
|
||||
movdqa xmm12, [rsp+16*6]
|
||||
movdqa xmm13, [rsp+16*7]
|
||||
movdqa xmm14, [rsp+16*8]
|
||||
movdqa xmm15, [rsp+16*9]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define tmp4 r13
|
||||
%define tmp5 r14
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp2 r10
|
||||
%define tmp3 r12
|
||||
%define tmp4 r13
|
||||
%define tmp5 r14
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
;;; gf_6vect_mad_sse(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest1 arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%define dest2 mul_array
|
||||
%define dest3 tmp2
|
||||
%define dest4 tmp4
|
||||
%define dest5 tmp5
|
||||
%define dest6 vec_i
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft4_lo xmm14
|
||||
%define xgft4_hi xmm13
|
||||
%define xgft5_lo xmm12
|
||||
%define xgft5_hi xmm11
|
||||
%define xgft6_lo xmm10
|
||||
%define xgft6_hi xmm9
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xtmph1 xmm2
|
||||
%define xtmpl1 xmm3
|
||||
%define xtmph2 xmm4
|
||||
%define xtmpl2 xmm5
|
||||
%define xtmph3 xmm6
|
||||
%define xtmpl3 xmm7
|
||||
%define xd1 xmm8
|
||||
%define xd2 xtmpl1
|
||||
%define xd3 xtmph1
|
||||
|
||||
|
||||
align 16
|
||||
global GF_6VECT_MAD_SSE:function
|
||||
func(GF_6VECT_MAD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
|
||||
mov tmp, vec
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
lea tmp3, [mul_array + vec_i]
|
||||
sal tmp, 6 ;Multiply by 64
|
||||
|
||||
sal vec, 5 ;Multiply by 32
|
||||
lea vec_i, [tmp + vec] ;vec_i = 96
|
||||
lea mul_array, [tmp + vec_i] ;mul_array = 160
|
||||
|
||||
movdqu xgft5_lo, [tmp3+2*tmp] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
movdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
movdqu xgft4_lo, [tmp3+vec_i] ;Load array Dx{00}, Dx{01}, Dx{02}, ...
|
||||
movdqu xgft4_hi, [tmp3+vec_i+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0}
|
||||
movdqu xgft6_lo, [tmp3+mul_array] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
|
||||
movdqu xgft6_hi, [tmp3+mul_array+16] ; " Fx{00}, Fx{10}, ..., Fx{f0}
|
||||
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest3, [dest1+2*PS]
|
||||
mov dest4, [dest1+3*PS] ; reuse mul_array
|
||||
mov dest5, [dest1+4*PS]
|
||||
mov dest6, [dest1+5*PS] ; reuse vec_i
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop16:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
|
||||
movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
movdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
XLDR xd1, [dest1+pos] ;Get next dest vector
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
;dest1
|
||||
pshufb xtmph1, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph1, xtmpl1 ;GF add high and low partials
|
||||
pxor xd1, xtmph1
|
||||
|
||||
XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector
|
||||
XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest3 vector
|
||||
|
||||
;dest2
|
||||
pshufb xtmph2, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph2, xtmpl2 ;GF add high and low partials
|
||||
pxor xd2, xtmph2
|
||||
|
||||
;dest3
|
||||
pshufb xtmph3, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph3, xtmpl3 ;GF add high and low partials
|
||||
pxor xd3, xtmph3
|
||||
|
||||
XSTR [dest1+pos], xd1 ;Store result into dest1
|
||||
XSTR [dest2+pos], xd2 ;Store result into dest2
|
||||
XSTR [dest3+pos], xd3 ;Store result into dest3
|
||||
|
||||
movdqa xtmph1, xgft4_hi ;Reload const array registers
|
||||
movdqa xtmpl1, xgft4_lo ;Reload const array registers
|
||||
movdqa xtmph2, xgft5_hi ;Reload const array registers
|
||||
movdqa xtmpl2, xgft5_lo ;Reload const array registers
|
||||
movdqa xtmph3, xgft6_hi ;Reload const array registers
|
||||
movdqa xtmpl3, xgft6_lo ;Reload const array registers
|
||||
|
||||
;dest4
|
||||
XLDR xd1, [dest4+pos] ;Get next dest vector
|
||||
pshufb xtmph1, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph1, xtmpl1 ;GF add high and low partials
|
||||
pxor xd1, xtmph1
|
||||
|
||||
XLDR xd2, [dest5+pos] ;reuse xtmpl1. Get next dest vector
|
||||
XLDR xd3, [dest6+pos] ;reuse xtmph1. Get next dest vector
|
||||
|
||||
;dest5
|
||||
pshufb xtmph2, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph2, xtmpl2 ;GF add high and low partials
|
||||
pxor xd2, xtmph2
|
||||
|
||||
;dest6
|
||||
pshufb xtmph3, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph3, xtmpl3 ;GF add high and low partials
|
||||
pxor xd3, xtmph3
|
||||
|
||||
XSTR [dest4+pos], xd1 ;Store result into dest4
|
||||
XSTR [dest5+pos], xd2 ;Store result into dest5
|
||||
XSTR [dest6+pos], xd3 ;Store result into dest6
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
.lessthan16:
|
||||
;; Tail len
|
||||
;; Do one more overlap pass
|
||||
;; Overlapped offset length-16
|
||||
mov tmp, len ;Backup len as len=rdi
|
||||
|
||||
XLDR x0, [src+tmp] ;Get next source vector
|
||||
XLDR xd1, [dest4+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest5+tmp] ;reuse xtmpl1. Get next dest vector
|
||||
XLDR xd3, [dest6+tmp] ;reuse xtmph1. Get next dest vector
|
||||
|
||||
sub len, pos
|
||||
|
||||
movdqa xtmph3, [constip16] ;Load const of i + 16
|
||||
pinsrb xtmpl3, len.w, 15
|
||||
pshufb xtmpl3, xmask0f ;Broadcast len to all bytes
|
||||
pcmpgtb xtmpl3, xtmph3
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
;dest4
|
||||
pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
pand xgft4_hi, xtmpl3
|
||||
pxor xd1, xgft4_hi
|
||||
|
||||
;dest5
|
||||
pshufb xgft5_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft5_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft5_hi, xgft5_lo ;GF add high and low partials
|
||||
pand xgft5_hi, xtmpl3
|
||||
pxor xd2, xgft5_hi
|
||||
|
||||
;dest6
|
||||
pshufb xgft6_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft6_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft6_hi, xgft6_lo ;GF add high and low partials
|
||||
pand xgft6_hi, xtmpl3
|
||||
pxor xd3, xgft6_hi
|
||||
|
||||
XSTR [dest4+tmp], xd1 ;Store result into dest4
|
||||
XSTR [dest5+tmp], xd2 ;Store result into dest5
|
||||
XSTR [dest6+tmp], xd3 ;Store result into dest6
|
||||
|
||||
movdqu xgft4_lo, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ...
|
||||
movdqu xgft4_hi, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0}
|
||||
movdqu xgft5_lo, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ...
|
||||
movdqu xgft5_hi, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0}
|
||||
movdqu xgft6_lo, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
movdqu xgft6_hi, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
XLDR xd1, [dest1+tmp] ;Get next dest vector
|
||||
XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector
|
||||
XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest3 vector
|
||||
|
||||
;dest1
|
||||
pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
pand xgft4_hi, xtmpl3
|
||||
pxor xd1, xgft4_hi
|
||||
|
||||
;dest2
|
||||
pshufb xgft5_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft5_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft5_hi, xgft5_lo ;GF add high and low partials
|
||||
pand xgft5_hi, xtmpl3
|
||||
pxor xd2, xgft5_hi
|
||||
|
||||
;dest3
|
||||
pshufb xgft6_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft6_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft6_hi, xgft6_lo ;GF add high and low partials
|
||||
pand xgft6_hi, xtmpl3
|
||||
pxor xd3, xgft6_hi
|
||||
|
||||
XSTR [dest1+tmp], xd1 ;Store result into dest1
|
||||
XSTR [dest2+tmp], xd2 ;Store result into dest2
|
||||
XSTR [dest3+tmp], xd3 ;Store result into dest3
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
constip16:
|
||||
ddq 0xf0f1f2f3f4f5f6f7f8f9fafbfcfdfeff
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_6VECT_MAD_SSE, 00, 00, 020f
|
303
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_dot_prod_avx.asm
generated
vendored
303
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_dot_prod_avx.asm
generated
vendored
|
@ -1,303 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_dot_prod_avx(len, vec, *g_tbls, **buffs, *dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
|
||||
%else
|
||||
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved and loaded
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 rdi ; must be saved and loaded
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define frame_size 2*8
|
||||
%define arg(x) [rsp + frame_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
rex_push_reg r12
|
||||
push_reg rdi
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop rdi
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
;;;================== High Address;
|
||||
;;; arg4
|
||||
;;; arg3
|
||||
;;; arg2
|
||||
;;; arg1
|
||||
;;; arg0
|
||||
;;; return
|
||||
;;;<================= esp of caller
|
||||
;;; ebp
|
||||
;;;<================= ebp = esp
|
||||
;;; esi
|
||||
;;; edi
|
||||
;;; ebx
|
||||
;;;<================= esp of callee
|
||||
;;;
|
||||
;;;================== Low Address;
|
||||
|
||||
%define PS 4
|
||||
%define LOG_PS 2
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp + PS*2 + PS*x]
|
||||
|
||||
%define trans ecx ;trans is for the variables in stack
|
||||
%define arg0 trans
|
||||
%define arg0_m arg(0)
|
||||
%define arg1 trans
|
||||
%define arg1_m arg(1)
|
||||
%define arg2 arg2_m
|
||||
%define arg2_m arg(2)
|
||||
%define arg3 ebx
|
||||
%define arg4 trans
|
||||
%define arg4_m arg(4)
|
||||
%define tmp edx
|
||||
%define tmp2 edi
|
||||
%define tmp3 esi
|
||||
%define return eax
|
||||
%macro SLDR 2 ;; stack load/restore
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg3, arg(3)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
mov esp, ebp
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
|
||||
%define vec_i tmp2
|
||||
%define ptr tmp3
|
||||
%define pos return
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
%define vec_m arg1_m
|
||||
%define len_m arg0_m
|
||||
%define dest_m arg4_m
|
||||
%endif
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn PS,8 ; 64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm5
|
||||
%define xgft_lo xmm4
|
||||
%define xgft_hi xmm3
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp xmm2
|
||||
|
||||
align 16
|
||||
global GF_VECT_DOT_PROD_AVX:function
|
||||
func(GF_VECT_DOT_PROD_AVX)
|
||||
FUNC_SAVE
|
||||
SLDR len, len_m
|
||||
sub len, 16
|
||||
SSTR len_m, len
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
|
||||
.loop16:
|
||||
vpxor xp, xp
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
|
||||
mov ptr, [src+vec_i*PS]
|
||||
vmovdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft_hi, [tmp+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
add tmp, 32
|
||||
add vec_i, 1
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft_hi, xgft_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft_lo, xgft_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials
|
||||
vpxor xp, xp, xgft_hi ;xp += partial
|
||||
|
||||
SLDR vec, vec_m
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
SLDR dest, dest_m
|
||||
XSTR [dest+pos], xp
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
SLDR len, len_m
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
mask0f:
|
||||
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_VECT_DOT_PROD_AVX, 02, 04, 0061
|
315
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_dot_prod_avx2.asm
generated
vendored
315
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_dot_prod_avx2.asm
generated
vendored
|
@ -1,315 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, *dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
|
||||
%else
|
||||
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved and loaded
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 rdi ; must be saved and loaded
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define frame_size 2*8
|
||||
%define arg(x) [rsp + frame_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
rex_push_reg r12
|
||||
push_reg rdi
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop rdi
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
;;;================== High Address;
|
||||
;;; arg4
|
||||
;;; arg3
|
||||
;;; arg2
|
||||
;;; arg1
|
||||
;;; arg0
|
||||
;;; return
|
||||
;;;<================= esp of caller
|
||||
;;; ebp
|
||||
;;;<================= ebp = esp
|
||||
;;; esi
|
||||
;;; edi
|
||||
;;; ebx
|
||||
;;;<================= esp of callee
|
||||
;;;
|
||||
;;;================== Low Address;
|
||||
|
||||
%define PS 4
|
||||
%define LOG_PS 2
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp + PS*2 + PS*x]
|
||||
|
||||
%define trans ecx ;trans is for the variables in stack
|
||||
%define arg0 trans
|
||||
%define arg0_m arg(0)
|
||||
%define arg1 trans
|
||||
%define arg1_m arg(1)
|
||||
%define arg2 arg2_m
|
||||
%define arg2_m arg(2)
|
||||
%define arg3 ebx
|
||||
%define arg4 trans
|
||||
%define arg4_m arg(4)
|
||||
%define tmp edx
|
||||
%define tmp.w edx
|
||||
%define tmp.b dl
|
||||
%define tmp2 edi
|
||||
%define tmp3 esi
|
||||
%define return eax
|
||||
%macro SLDR 2 ;stack load/restore
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg3, arg(3)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
mov esp, ebp
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
|
||||
%define vec_i tmp2
|
||||
%define ptr tmp3
|
||||
%define pos return
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
%define vec_m arg1_m
|
||||
%define len_m arg0_m
|
||||
%define dest_m arg4_m
|
||||
%endif
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm3
|
||||
%define xmask0fx xmm3
|
||||
%define xgft_lo ymm4
|
||||
%define xgft_hi ymm5
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp ymm2
|
||||
|
||||
align 16
|
||||
global GF_VECT_DOT_PROD_AVX2:function
|
||||
func(GF_VECT_DOT_PROD_AVX2)
|
||||
FUNC_SAVE
|
||||
SLDR len, len_m
|
||||
sub len, 32
|
||||
SSTR len_m, len
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
.loop32:
|
||||
vpxor xp, xp
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
|
||||
mov ptr, [src+vec_i*PS]
|
||||
|
||||
vmovdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
vperm2i128 xgft_hi, xgft_lo, xgft_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft_lo, xgft_lo, xgft_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
add tmp, 32
|
||||
add vec_i, 1
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft_hi, xgft_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft_lo, xgft_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials
|
||||
vpxor xp, xp, xgft_hi ;xp += partial
|
||||
|
||||
SLDR vec, vec_m
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
SLDR dest, dest_m
|
||||
XSTR [dest+pos], xp
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
SLDR len, len_m
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-32
|
||||
jmp .loop32 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_VECT_DOT_PROD_AVX2, 04, 04, 0190
|
303
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_dot_prod_sse.asm
generated
vendored
303
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_dot_prod_sse.asm
generated
vendored
|
@ -1,303 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_dot_prod_sse(len, vec, *g_tbls, **buffs, *dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
|
||||
%else
|
||||
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved and loaded
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 rdi ; must be saved and loaded
|
||||
%define return rax
|
||||
%macro SLDR 2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
%define PS 8
|
||||
%define frame_size 2*8
|
||||
%define arg(x) [rsp + frame_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
rex_push_reg r12
|
||||
push_reg rdi
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop rdi
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
;;;================== High Address;
|
||||
;;; arg4
|
||||
;;; arg3
|
||||
;;; arg2
|
||||
;;; arg1
|
||||
;;; arg0
|
||||
;;; return
|
||||
;;;<================= esp of caller
|
||||
;;; ebp
|
||||
;;;<================= ebp = esp
|
||||
;;; esi
|
||||
;;; edi
|
||||
;;; ebx
|
||||
;;;<================= esp of callee
|
||||
;;;
|
||||
;;;================== Low Address;
|
||||
|
||||
%define PS 4
|
||||
%define LOG_PS 2
|
||||
%define func(x) x:
|
||||
%define arg(x) [ebp + PS*2 + PS*x]
|
||||
|
||||
%define trans ecx ;trans is for the variables in stack
|
||||
%define arg0 trans
|
||||
%define arg0_m arg(0)
|
||||
%define arg1 trans
|
||||
%define arg1_m arg(1)
|
||||
%define arg2 arg2_m
|
||||
%define arg2_m arg(2)
|
||||
%define arg3 ebx
|
||||
%define arg4 trans
|
||||
%define arg4_m arg(4)
|
||||
%define tmp edx
|
||||
%define tmp2 edi
|
||||
%define tmp3 esi
|
||||
%define return eax
|
||||
%macro SLDR 2 ;; stack load/restore
|
||||
mov %1, %2
|
||||
%endmacro
|
||||
%define SSTR SLDR
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
mov arg3, arg(3)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
mov esp, ebp
|
||||
pop ebp
|
||||
%endmacro
|
||||
|
||||
%endif ; output formats
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
|
||||
%define vec_i tmp2
|
||||
%define ptr tmp3
|
||||
%define pos return
|
||||
|
||||
%ifidn PS,4 ;32-bit code
|
||||
%define vec_m arg1_m
|
||||
%define len_m arg0_m
|
||||
%define dest_m arg4_m
|
||||
%endif
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn PS,8 ;64-bit code
|
||||
default rel
|
||||
[bits 64]
|
||||
%endif
|
||||
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm5
|
||||
%define xgft_lo xmm4
|
||||
%define xgft_hi xmm3
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp xmm2
|
||||
|
||||
align 16
|
||||
global GF_VECT_DOT_PROD_SSE:function
|
||||
func(GF_VECT_DOT_PROD_SSE)
|
||||
FUNC_SAVE
|
||||
SLDR len, len_m
|
||||
sub len, 16
|
||||
SSTR len_m, len
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
|
||||
.loop16:
|
||||
pxor xp, xp
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
|
||||
mov ptr, [src+vec_i*PS]
|
||||
movdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft_hi, [tmp+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
add tmp, 32
|
||||
add vec_i, 1
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
pshufb xgft_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft_hi, xgft_lo ;GF add high and low partials
|
||||
pxor xp, xgft_hi ;xp += partial
|
||||
|
||||
SLDR vec, vec_m
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
SLDR dest, dest_m
|
||||
XSTR [dest+pos], xp
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
SLDR len, len_m
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_VECT_DOT_PROD_SSE, 00, 04, 0060
|
223
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_mad_avx.asm
generated
vendored
223
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_mad_avx.asm
generated
vendored
|
@ -1,223 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_mad_avx(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_MAD_AVX _gf_vect_mad_avx
|
||||
%else
|
||||
%define GF_VECT_MAD_AVX gf_vect_mad_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define PS 8
|
||||
%define stack_size 16*3 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
vmovdqa [rsp+16*0],xmm6
|
||||
vmovdqa [rsp+16*1],xmm7
|
||||
vmovdqa [rsp+16*2],xmm8
|
||||
save_reg r12, 3*16 + 0*8
|
||||
save_reg r15, 3*16 + 1*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp+16*0]
|
||||
vmovdqa xmm7, [rsp+16*1]
|
||||
vmovdqa xmm8, [rsp+16*2]
|
||||
mov r12, [rsp + 3*16 + 0*8]
|
||||
mov r15, [rsp + 3*16 + 1*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
;;; gf_vect_mad_avx(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm8
|
||||
%define xgft_lo xmm7
|
||||
%define xgft_hi xmm6
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xtmph xmm2
|
||||
%define xtmpl xmm3
|
||||
%define xd xmm4
|
||||
%define xtmpd xmm5
|
||||
|
||||
align 16
|
||||
global GF_VECT_MAD_AVX:function
|
||||
func(GF_VECT_MAD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
vmovdqu xgft_lo, [vec_i+mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
vmovdqu xgft_hi, [vec_i+mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
|
||||
XLDR xtmpd, [dest+len] ;backup the last 16 bytes in dest
|
||||
|
||||
.loop16:
|
||||
XLDR xd, [dest+pos] ;Get next dest vector
|
||||
.loop16_overlap:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xtmph, xgft_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
|
||||
vpxor xd, xd, xtmph ;xd += partial
|
||||
|
||||
XSTR [dest+pos], xd
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
vmovdqa xd, xtmpd ;Restore xd
|
||||
jmp .loop16_overlap ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_VECT_MAD_AVX, 02, 00, 0201
|
233
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_mad_avx2.asm
generated
vendored
233
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_mad_avx2.asm
generated
vendored
|
@ -1,233 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_mad_avx2(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_MAD_AVX2 _gf_vect_mad_avx2
|
||||
%else
|
||||
%define GF_VECT_MAD_AVX2 gf_vect_mad_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12 ; must be saved and loaded
|
||||
%define arg5 r15
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define PS 8
|
||||
%define stack_size 16*3 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
vmovdqa [rsp+16*0],xmm6
|
||||
vmovdqa [rsp+16*1],xmm7
|
||||
vmovdqa [rsp+16*2],xmm8
|
||||
save_reg r12, 3*16 + 0*8
|
||||
save_reg r15, 3*16 + 1*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp+16*0]
|
||||
vmovdqa xmm7, [rsp+16*1]
|
||||
vmovdqa xmm8, [rsp+16*2]
|
||||
mov r12, [rsp + 3*16 + 0*8]
|
||||
mov r15, [rsp + 3*16 + 1*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
|
||||
;;; gf_vect_mad_avx2(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm8
|
||||
%define xmask0fx xmm8
|
||||
%define xgft_lo ymm7
|
||||
%define xgft_hi ymm6
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xtmph ymm2
|
||||
%define xtmpl ymm3
|
||||
%define xd ymm4
|
||||
%define xtmpd ymm5
|
||||
|
||||
align 16
|
||||
global GF_VECT_MAD_AVX2:function
|
||||
func(GF_VECT_MAD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
vmovdqu xgft_lo, [vec_i+mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
vperm2i128 xgft_hi, xgft_lo, xgft_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft_lo, xgft_lo, xgft_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
XLDR xtmpd, [dest+len] ;backup the last 32 bytes in dest
|
||||
|
||||
.loop32:
|
||||
XLDR xd, [dest+pos] ;Get next dest vector
|
||||
.loop32_overlap:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xtmph, xgft_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmpl, xgft_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xtmph, xtmph, xtmpl ;GF add high and low partials
|
||||
vpxor xd, xd, xtmph ;xd += partial
|
||||
|
||||
XSTR [dest+pos], xd
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-32
|
||||
vmovdqa xd, xtmpd ;Restore xd
|
||||
jmp .loop32_overlap ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_VECT_MAD_AVX2, 04, 00, 0202
|
224
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_mad_sse.asm
generated
vendored
224
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_mad_sse.asm
generated
vendored
|
@ -1,224 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_mad_sse(len, vec, vec_i, mul_array, src, dest);
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_MAD_SSE _gf_vect_mad_sse
|
||||
%else
|
||||
%define GF_VECT_MAD_SSE gf_vect_mad_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg0.w ecx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define arg4 r12
|
||||
%define arg5 r15
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
%define PS 8
|
||||
%define stack_size 16*3 + 3*8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
%define func(x) proc_frame x
|
||||
|
||||
%macro FUNC_SAVE 0
|
||||
sub rsp, stack_size
|
||||
movdqa [rsp+16*0],xmm6
|
||||
movdqa [rsp+16*1],xmm7
|
||||
movdqa [rsp+16*2],xmm8
|
||||
save_reg r12, 3*16 + 0*8
|
||||
save_reg r15, 3*16 + 1*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
mov arg5, arg(5)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp+16*0]
|
||||
movdqa xmm7, [rsp+16*1]
|
||||
movdqa xmm8, [rsp+16*2]
|
||||
mov r12, [rsp + 3*16 + 0*8]
|
||||
mov r15, [rsp + 3*16 + 1*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg0.w edi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define return.w eax
|
||||
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
;;; gf_vect_mad_sse(len, vec, vec_i, mul_array, src, dest)
|
||||
%define len arg0
|
||||
%define len.w arg0.w
|
||||
%define vec arg1
|
||||
%define vec_i arg2
|
||||
%define mul_array arg3
|
||||
%define src arg4
|
||||
%define dest arg5
|
||||
%define pos return
|
||||
%define pos.w return.w
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm8
|
||||
%define xgft_lo xmm7
|
||||
%define xgft_hi xmm6
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xtmph xmm2
|
||||
%define xtmpl xmm3
|
||||
%define xd xmm4
|
||||
%define xtmpd xmm5
|
||||
|
||||
|
||||
align 16
|
||||
global GF_VECT_MAD_SSE:function
|
||||
func(GF_VECT_MAD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
sal vec_i, 5 ;Multiply by 32
|
||||
movdqu xgft_lo, [vec_i+mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
movdqu xgft_hi, [vec_i+mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
|
||||
XLDR xtmpd, [dest+len] ;backup the last 16 bytes in dest
|
||||
|
||||
.loop16:
|
||||
XLDR xd, [dest+pos] ;Get next dest vector
|
||||
.loop16_overlap:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
movdqa xtmph, xgft_hi ;Reload const array registers
|
||||
movdqa xtmpl, xgft_lo
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
pshufb xtmph, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmpl, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xtmph, xtmpl ;GF add high and low partials
|
||||
|
||||
pxor xd, xtmph
|
||||
XSTR [dest+pos], xd ;Store result
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
movdqa xd, xtmpd ;Restore xd
|
||||
jmp .loop16_overlap ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_VECT_MAD_SSE, 00, 00, 0200
|
|
@ -1,148 +0,0 @@
|
|||
/**********************************************************************
|
||||
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
#ifndef _GF_VECT_MUL_H
|
||||
#define _GF_VECT_MUL_H
|
||||
|
||||
/**
|
||||
* @file gf_vect_mul.h
|
||||
* @brief Interface to functions for vector (block) multiplication in GF(2^8).
|
||||
*
|
||||
* This file defines the interface to routines used in fast RAID rebuild and
|
||||
* erasure codes.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply by constant.
|
||||
*
|
||||
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
|
||||
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
|
||||
* and partial write functions. Function requires pre-calculation of a
|
||||
* 32-element constant array based on constant C. gftbl(C) = {C{00},
|
||||
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
|
||||
* and src must be aligned to 32B.
|
||||
* @requires SSE4.1
|
||||
*
|
||||
* @param len Length of vector in bytes. Must be aligned to 32B.
|
||||
* @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
|
||||
* @param src Pointer to src data array. Must be aligned to 32B.
|
||||
* @param dest Pointer to destination data array. Must be aligned to 32B.
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest);
|
||||
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply by constant.
|
||||
*
|
||||
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
|
||||
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
|
||||
* and partial write functions. Function requires pre-calculation of a
|
||||
* 32-element constant array based on constant C. gftbl(C) = {C{00},
|
||||
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
|
||||
* and src must be aligned to 32B.
|
||||
* @requires AVX
|
||||
*
|
||||
* @param len Length of vector in bytes. Must be aligned to 32B.
|
||||
* @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
|
||||
* @param src Pointer to src data array. Must be aligned to 32B.
|
||||
* @param dest Pointer to destination data array. Must be aligned to 32B.
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest);
|
||||
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply by constant, runs appropriate version.
|
||||
*
|
||||
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
|
||||
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
|
||||
* and partial write functions. Function requires pre-calculation of a
|
||||
* 32-element constant array based on constant C. gftbl(C) = {C{00},
|
||||
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }.
|
||||
* Len and src must be aligned to 32B.
|
||||
*
|
||||
* This function determines what instruction sets are enabled
|
||||
* and selects the appropriate version at runtime.
|
||||
*
|
||||
* @param len Length of vector in bytes. Must be aligned to 32B.
|
||||
* @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
|
||||
* @param src Pointer to src data array. Must be aligned to 32B.
|
||||
* @param dest Pointer to destination data array. Must be aligned to 32B.
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Initialize 32-byte constant array for GF(2^8) vector multiply
|
||||
*
|
||||
* Calculates array {C{00}, C{01}, C{02}, ... , C{0f} }, {C{00}, C{10},
|
||||
* C{20}, ... , C{f0} } as required by other fast vector multiply
|
||||
* functions.
|
||||
* @param c Constant input.
|
||||
* @param gftbl Table output.
|
||||
*/
|
||||
|
||||
void gf_vect_mul_init(unsigned char c, unsigned char* gftbl);
|
||||
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply by constant, runs baseline version.
|
||||
*
|
||||
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
|
||||
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
|
||||
* and partial write functions. Function requires pre-calculation of a
|
||||
* 32-element constant array based on constant C. gftbl(C) = {C{00},
|
||||
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
|
||||
* and src must be aligned to 32B.
|
||||
*
|
||||
* @param len Length of vector in bytes. Must be aligned to 32B.
|
||||
* @param a Pointer to 32-byte array of pre-calculated constants based on C.
|
||||
* only use 2nd element is used.
|
||||
* @param src Pointer to src data array. Must be aligned to 32B.
|
||||
* @param dest Pointer to destination data array. Must be aligned to 32B.
|
||||
*/
|
||||
|
||||
void gf_vect_mul_base(int len, unsigned char *a, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_GF_VECT_MUL_H
|
189
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_mul_avx.asm
generated
vendored
189
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_mul_avx.asm
generated
vendored
|
@ -1,189 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_mul_avx(len, mul_array, src, dest)
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
|
||||
%else
|
||||
%define GF_VECT_MUL_AVX gf_vect_mul_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define return rax
|
||||
%define stack_size 5*16 + 8 ; must be an odd multiple of 8
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm13, 2*16
|
||||
save_xmm128 xmm14, 3*16
|
||||
save_xmm128 xmm15, 4*16
|
||||
end_prolog
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm13, [rsp + 2*16]
|
||||
vmovdqa xmm14, [rsp + 3*16]
|
||||
vmovdqa xmm15, [rsp + 4*16]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%endif
|
||||
|
||||
|
||||
%define len arg0
|
||||
%define mul_array arg1
|
||||
%define src arg2
|
||||
%define dest arg3
|
||||
%define pos return
|
||||
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft_lo xmm14
|
||||
%define xgft_hi xmm13
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmp1a xmm1
|
||||
%define xtmp1b xmm2
|
||||
%define xtmp1c xmm3
|
||||
%define x1 xmm4
|
||||
%define xtmp2a xmm5
|
||||
%define xtmp2b xmm6
|
||||
%define xtmp2c xmm7
|
||||
|
||||
align 16
|
||||
global GF_VECT_MUL_AVX:function
|
||||
func(GF_VECT_MUL_AVX)
|
||||
FUNC_SAVE
|
||||
mov pos, 0
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
vmovdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
vmovdqu xgft_hi, [mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
|
||||
loop32:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
XLDR x1, [src+pos+16] ;Get next source vector + 16B ahead
|
||||
add pos, 32 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
vpand xtmp1a, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpand xtmp2a, x1, xmask0f
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpsraw x1, x1, 4
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vpand x1, x1, xmask0f
|
||||
vpshufb xtmp1b, xgft_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmp1c, xgft_lo, xtmp1a ;Lookup mul table of low nibble
|
||||
vpshufb xtmp2b, xgft_hi, x1 ;Lookup mul table of high nibble
|
||||
vpshufb xtmp2c, xgft_lo, xtmp2a ;Lookup mul table of low nibble
|
||||
vpxor xtmp1b, xtmp1b, xtmp1c ;GF add high and low partials
|
||||
vpxor xtmp2b, xtmp2b, xtmp2c
|
||||
XSTR [dest+pos-32], xtmp1b ;Store result
|
||||
XSTR [dest+pos-16], xtmp2b ;Store +16B result
|
||||
jl loop32
|
||||
|
||||
|
||||
return_pass:
|
||||
FUNC_RESTORE
|
||||
sub pos, len
|
||||
ret
|
||||
|
||||
return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
mask0f:
|
||||
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_VECT_MUL_AVX, 01, 02, 0036
|
195
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_mul_sse.asm
generated
vendored
195
pkg/api/Godeps/_workspace/src/github.com/minio-io/erasure/gf_vect_mul_sse.asm
generated
vendored
|
@ -1,195 +0,0 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_mul_sse(len, mul_array, src, dest)
|
||||
;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
|
||||
%else
|
||||
%define GF_VECT_MUL_SSE gf_vect_mul_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define return rax
|
||||
%define stack_size 5*16 + 8 ; must be an odd multiple of 8
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm13, 2*16
|
||||
save_xmm128 xmm14, 3*16
|
||||
save_xmm128 xmm15, 4*16
|
||||
end_prolog
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm13, [rsp + 2*16]
|
||||
movdqa xmm14, [rsp + 3*16]
|
||||
movdqa xmm15, [rsp + 4*16]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%endif
|
||||
|
||||
|
||||
%define len arg0
|
||||
%define mul_array arg1
|
||||
%define src arg2
|
||||
%define dest arg3
|
||||
%define pos return
|
||||
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft_lo xmm14
|
||||
%define xgft_hi xmm13
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmp1a xmm1
|
||||
%define xtmp1b xmm2
|
||||
%define xtmp1c xmm3
|
||||
%define x1 xmm4
|
||||
%define xtmp2a xmm5
|
||||
%define xtmp2b xmm6
|
||||
%define xtmp2c xmm7
|
||||
|
||||
|
||||
align 16
|
||||
global GF_VECT_MUL_SSE:function
|
||||
func(GF_VECT_MUL_SSE)
|
||||
FUNC_SAVE
|
||||
mov pos, 0
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
movdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
movdqu xgft_hi, [mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
|
||||
loop32:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
XLDR x1, [src+pos+16] ;Get next source vector + 16B ahead
|
||||
movdqa xtmp1b, xgft_hi ;Reload const array registers
|
||||
movdqa xtmp1c, xgft_lo
|
||||
movdqa xtmp2b, xgft_hi
|
||||
movdqa xtmp2c, xgft_lo
|
||||
movdqa xtmp1a, x0 ;Keep unshifted copy of src
|
||||
movdqa xtmp2a, x1
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
psraw x1, 4
|
||||
pand xtmp1a, xmask0f ;Mask low src nibble in bits 4-0
|
||||
pand xtmp2a, xmask0f
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand x1, xmask0f
|
||||
pshufb xtmp1b, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmp1c, xtmp1a ;Lookup mul table of low nibble
|
||||
pshufb xtmp2b, x1
|
||||
pshufb xtmp2c, xtmp2a
|
||||
pxor xtmp1b, xtmp1c ;GF add high and low partials
|
||||
pxor xtmp2b, xtmp2c
|
||||
XSTR [dest+pos], xtmp1b ;Store result
|
||||
XSTR [dest+pos+16], xtmp2b ;Store +16B result
|
||||
add pos, 32 ;Loop on 32 bytes at at time
|
||||
cmp pos, len
|
||||
jl loop32
|
||||
|
||||
|
||||
return_pass:
|
||||
sub pos, len
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f:
|
||||
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion GF_VECT_MUL_SSE, 00, 02, 0034
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
* Minimalist Object Storage, (C) 2014 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package erasure
|
||||
|
||||
//
|
||||
// int sizeInt()
|
||||
// {
|
||||
// return sizeof(int);
|
||||
// }
|
||||
import "C"
|
||||
import "unsafe"
|
||||
|
||||
var (
|
||||
// See http://golang.org/ref/spec#Numeric_types
|
||||
sizeInt = int(C.sizeInt())
|
||||
// SizeInt8 is the byte size of a int8.
|
||||
sizeInt8 = int(unsafe.Sizeof(int8(0)))
|
||||
// SizeInt16 is the byte size of a int16.
|
||||
sizeInt16 = int(unsafe.Sizeof(int16(0)))
|
||||
// SizeInt32 is the byte size of a int32.
|
||||
sizeInt32 = int(unsafe.Sizeof(int32(0)))
|
||||
// SizeInt64 is the byte size of a int64.
|
||||
sizeInt64 = int(unsafe.Sizeof(int64(0)))
|
||||
)
|
|
@ -1,66 +0,0 @@
|
|||
/*
|
||||
* Minimalist Object Storage, (C) 2014 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package erasure
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
. "gopkg.in/check.v1"
|
||||
)
|
||||
|
||||
func corruptChunks(chunks [][]byte, errorIndex []int) [][]byte {
|
||||
for _, err := range errorIndex {
|
||||
chunks[err] = nil
|
||||
}
|
||||
return chunks
|
||||
}
|
||||
|
||||
func (s *MySuite) TestVanderMondeEncodeDecodeFailure(c *C) {
|
||||
ep, _ := ValidateParams(k, m, Vandermonde)
|
||||
|
||||
data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
|
||||
|
||||
e := NewErasure(ep)
|
||||
chunks, err := e.Encode(data)
|
||||
c.Assert(err, IsNil)
|
||||
|
||||
errorIndex := []int{0, 3, 5, 9, 11, 13}
|
||||
chunks = corruptChunks(chunks, errorIndex)
|
||||
|
||||
_, err = e.Decode(chunks, len(data))
|
||||
c.Assert(err, Not(IsNil))
|
||||
}
|
||||
|
||||
func (s *MySuite) TestVanderMondeEncodeDecodeSuccess(c *C) {
|
||||
ep, _ := ValidateParams(k, m, Vandermonde)
|
||||
|
||||
data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
|
||||
|
||||
e := NewErasure(ep)
|
||||
chunks, err := e.Encode(data)
|
||||
c.Assert(err, IsNil)
|
||||
|
||||
errorIndex := []int{0, 3, 5, 9, 13}
|
||||
chunks = corruptChunks(chunks, errorIndex)
|
||||
|
||||
recoveredData, err := e.Decode(chunks, len(data))
|
||||
c.Assert(err, IsNil)
|
||||
|
||||
if !bytes.Equal(recoveredData, data) {
|
||||
c.Fatalf("Recovered data mismatches with original data")
|
||||
}
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
|
@ -1,202 +0,0 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
# iodine
|
||||
Iodine is an error logging framework for tracing errors.
|
|
@ -1,201 +0,0 @@
|
|||
/*
|
||||
* Iodine, (C) 2015 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package iodine
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// WrappedError is the iodine error which contains a pointer to the original error
|
||||
// and stack traces.
|
||||
type Error struct {
|
||||
EmbeddedError error `json:"-"`
|
||||
ErrorMessage string
|
||||
ErrorType string
|
||||
|
||||
Stack []StackEntry
|
||||
}
|
||||
|
||||
// StackEntry contains the entry in the stack trace
|
||||
type StackEntry struct {
|
||||
Host string
|
||||
File string
|
||||
Line int
|
||||
Data map[string]string
|
||||
}
|
||||
|
||||
var gopath string
|
||||
|
||||
var globalState = struct {
|
||||
sync.RWMutex
|
||||
m map[string]string
|
||||
}{m: make(map[string]string)}
|
||||
|
||||
// SetGlobalState - set global state
|
||||
func SetGlobalState(key, value string) {
|
||||
globalState.Lock()
|
||||
globalState.m[key] = value
|
||||
globalState.Unlock()
|
||||
}
|
||||
|
||||
// ClearGlobalState - clear info in globalState struct
|
||||
func ClearGlobalState() {
|
||||
globalState.Lock()
|
||||
for k := range globalState.m {
|
||||
delete(globalState.m, k)
|
||||
}
|
||||
globalState.Unlock()
|
||||
}
|
||||
|
||||
// GetGlobalState - get map from globalState struct
|
||||
func GetGlobalState() map[string]string {
|
||||
result := make(map[string]string)
|
||||
globalState.RLock()
|
||||
for k, v := range globalState.m {
|
||||
result[k] = v
|
||||
}
|
||||
globalState.RUnlock()
|
||||
return result
|
||||
}
|
||||
|
||||
// GetGlobalStateKey - get value for key from globalState struct
|
||||
func GetGlobalStateKey(k string) string {
|
||||
result, ok := globalState.m[k]
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Error - instantiate an error, turning it into an iodine error.
|
||||
// Adds an initial stack trace.
|
||||
func New(err error, data map[string]string) error {
|
||||
if err != nil {
|
||||
entry := createStackEntry()
|
||||
var newErr Error
|
||||
|
||||
// check if error is wrapped
|
||||
switch typedError := err.(type) {
|
||||
case Error:
|
||||
{
|
||||
newErr = typedError
|
||||
}
|
||||
default:
|
||||
{
|
||||
newErr = Error{
|
||||
EmbeddedError: err,
|
||||
ErrorMessage: err.Error(),
|
||||
ErrorType: reflect.TypeOf(err).String(),
|
||||
Stack: []StackEntry{},
|
||||
}
|
||||
}
|
||||
}
|
||||
for k, v := range data {
|
||||
entry.Data[k] = v
|
||||
}
|
||||
newErr.Stack = append(newErr.Stack, entry)
|
||||
return newErr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// createStackEntry - create stack entries
|
||||
func createStackEntry() StackEntry {
|
||||
host, _ := os.Hostname()
|
||||
_, file, line, _ := runtime.Caller(2)
|
||||
file = strings.TrimPrefix(file, gopath) // trim gopath from file
|
||||
|
||||
data := GetGlobalState()
|
||||
for k, v := range getSystemData() {
|
||||
data[k] = v
|
||||
}
|
||||
|
||||
entry := StackEntry{
|
||||
Host: host,
|
||||
File: file,
|
||||
Line: line,
|
||||
Data: data,
|
||||
}
|
||||
return entry
|
||||
}
|
||||
|
||||
func getSystemData() map[string]string {
|
||||
host, err := os.Hostname()
|
||||
if err != nil {
|
||||
host = ""
|
||||
}
|
||||
memstats := &runtime.MemStats{}
|
||||
runtime.ReadMemStats(memstats)
|
||||
return map[string]string{
|
||||
"sys.host": host,
|
||||
"sys.os": runtime.GOOS,
|
||||
"sys.arch": runtime.GOARCH,
|
||||
"sys.go": runtime.Version(),
|
||||
"sys.cpus": strconv.Itoa(runtime.NumCPU()),
|
||||
"sys.mem.used": strconv.FormatUint(memstats.Alloc, 10),
|
||||
"sys.mem.allocated": strconv.FormatUint(memstats.TotalAlloc, 10),
|
||||
"sys.mem.heap.used": strconv.FormatUint(memstats.HeapAlloc, 10),
|
||||
"sys.mem.heap.allocated": strconv.FormatUint(memstats.HeapSys, 10),
|
||||
}
|
||||
}
|
||||
|
||||
// Annotate an error with a stack entry and returns itself
|
||||
//func (err *WrappedError) Annotate(info map[string]string) *WrappedError {
|
||||
// entry := createStackEntry()
|
||||
// for k, v := range info {
|
||||
// entry.Data[k] = v
|
||||
// }
|
||||
// err.Stack = append(err.Stack, entry)
|
||||
// return err
|
||||
//}
|
||||
|
||||
// EmitJSON writes JSON output for the error
|
||||
func (err Error) EmitJSON() ([]byte, error) {
|
||||
return json.Marshal(err)
|
||||
}
|
||||
|
||||
// EmitHumanReadable returns a human readable error message
|
||||
func (err Error) EmitHumanReadable() string {
|
||||
var errorBuffer bytes.Buffer
|
||||
fmt.Fprintln(&errorBuffer, err.ErrorMessage)
|
||||
for i, entry := range err.Stack {
|
||||
fmt.Fprintln(&errorBuffer, "-", i, entry.Host+":"+entry.File+":"+strconv.Itoa(entry.Line), entry.Data)
|
||||
}
|
||||
return string(errorBuffer.Bytes())
|
||||
}
|
||||
|
||||
// Emits the original error message
|
||||
func (err Error) Error() string {
|
||||
return err.EmitHumanReadable()
|
||||
}
|
||||
|
||||
func init() {
|
||||
_, iodineFile, _, _ := runtime.Caller(0)
|
||||
iodineFile = path.Dir(iodineFile) // trim iodine.go
|
||||
iodineFile = path.Dir(iodineFile) // trim iodine
|
||||
iodineFile = path.Dir(iodineFile) // trim minio-io
|
||||
gopath = path.Dir(iodineFile) + "/" // trim github.com
|
||||
}
|
|
@ -1,97 +0,0 @@
|
|||
/*
|
||||
* Iodine, (C) 2015 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package iodine
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestIodine(t *testing.T) {
|
||||
iodineError := New(errors.New("Hello"), nil)
|
||||
iodineError = New(iodineError, nil)
|
||||
iodineError = New(iodineError, nil)
|
||||
iodineError = New(iodineError, nil)
|
||||
switch typedError := iodineError.(type) {
|
||||
case Error:
|
||||
{
|
||||
if len(typedError.Stack) != 4 {
|
||||
t.Fail()
|
||||
}
|
||||
_, err := json.MarshalIndent(typedError, "", " ")
|
||||
if err != nil {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
default:
|
||||
{
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestState(t *testing.T) {
|
||||
SetGlobalState("hello", "world")
|
||||
result := GetGlobalStateKey("hello")
|
||||
if result != "world" {
|
||||
t.Error("global state not set: hello->world")
|
||||
t.Fail()
|
||||
}
|
||||
ClearGlobalState()
|
||||
if len(GetGlobalState()) != 0 {
|
||||
t.Fail()
|
||||
}
|
||||
SetGlobalState("foo", "bar")
|
||||
err := New(errors.New("a simple error"), nil)
|
||||
switch typedError := err.(type) {
|
||||
case Error:
|
||||
{
|
||||
if res, ok := typedError.Stack[0].Data["foo"]; ok {
|
||||
if res != "bar" {
|
||||
t.Error("global state not set: foo->bar")
|
||||
}
|
||||
} else {
|
||||
t.Fail()
|
||||
}
|
||||
typedError = New(typedError, map[string]string{"foo2": "bar2"}).(Error)
|
||||
if res, ok := typedError.Stack[0].Data["foo"]; ok {
|
||||
if res != "bar" {
|
||||
t.Error("annotate should not modify previous data entries")
|
||||
}
|
||||
} else {
|
||||
t.Error("annotate should not remove previous data entries")
|
||||
}
|
||||
if res, ok := typedError.Stack[1].Data["foo"]; ok {
|
||||
if res != "bar" {
|
||||
t.Error("global state should set value properly in annotate")
|
||||
}
|
||||
} else {
|
||||
t.Error("global state should set key properly in annotate")
|
||||
}
|
||||
if res, ok := typedError.Stack[1].Data["foo2"]; ok {
|
||||
if res != "bar2" {
|
||||
// typedError = Error(typedError, nil).(WrappedError)
|
||||
t.Error("foo2 -> bar should be set")
|
||||
}
|
||||
} else {
|
||||
// typedError = Error(typedError, nil).(WrappedError)
|
||||
t.Error("foo2 should be set")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
|
@ -1,23 +0,0 @@
|
|||
objx - by Mat Ryer and Tyler Bunnell
|
||||
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2014 Stretchr, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -1,3 +0,0 @@
|
|||
# objx
|
||||
|
||||
* Jump into the [API Documentation](http://godoc.org/github.com/stretchr/objx)
|
|
@ -1,179 +0,0 @@
|
|||
package objx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// arrayAccesRegexString is the regex used to extract the array number
|
||||
// from the access path
|
||||
const arrayAccesRegexString = `^(.+)\[([0-9]+)\]$`
|
||||
|
||||
// arrayAccesRegex is the compiled arrayAccesRegexString
|
||||
var arrayAccesRegex = regexp.MustCompile(arrayAccesRegexString)
|
||||
|
||||
// Get gets the value using the specified selector and
|
||||
// returns it inside a new Obj object.
|
||||
//
|
||||
// If it cannot find the value, Get will return a nil
|
||||
// value inside an instance of Obj.
|
||||
//
|
||||
// Get can only operate directly on map[string]interface{} and []interface.
|
||||
//
|
||||
// Example
|
||||
//
|
||||
// To access the title of the third chapter of the second book, do:
|
||||
//
|
||||
// o.Get("books[1].chapters[2].title")
|
||||
func (m Map) Get(selector string) *Value {
|
||||
rawObj := access(m, selector, nil, false, false)
|
||||
return &Value{data: rawObj}
|
||||
}
|
||||
|
||||
// Set sets the value using the specified selector and
|
||||
// returns the object on which Set was called.
|
||||
//
|
||||
// Set can only operate directly on map[string]interface{} and []interface
|
||||
//
|
||||
// Example
|
||||
//
|
||||
// To set the title of the third chapter of the second book, do:
|
||||
//
|
||||
// o.Set("books[1].chapters[2].title","Time to Go")
|
||||
func (m Map) Set(selector string, value interface{}) Map {
|
||||
access(m, selector, value, true, false)
|
||||
return m
|
||||
}
|
||||
|
||||
// access accesses the object using the selector and performs the
|
||||
// appropriate action.
|
||||
func access(current, selector, value interface{}, isSet, panics bool) interface{} {
|
||||
|
||||
switch selector.(type) {
|
||||
case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64:
|
||||
|
||||
if array, ok := current.([]interface{}); ok {
|
||||
index := intFromInterface(selector)
|
||||
|
||||
if index >= len(array) {
|
||||
if panics {
|
||||
panic(fmt.Sprintf("objx: Index %d is out of range. Slice only contains %d items.", index, len(array)))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return array[index]
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
case string:
|
||||
|
||||
selStr := selector.(string)
|
||||
selSegs := strings.SplitN(selStr, PathSeparator, 2)
|
||||
thisSel := selSegs[0]
|
||||
index := -1
|
||||
var err error
|
||||
|
||||
// https://github.com/stretchr/objx/issues/12
|
||||
if strings.Contains(thisSel, "[") {
|
||||
|
||||
arrayMatches := arrayAccesRegex.FindStringSubmatch(thisSel)
|
||||
|
||||
if len(arrayMatches) > 0 {
|
||||
|
||||
// Get the key into the map
|
||||
thisSel = arrayMatches[1]
|
||||
|
||||
// Get the index into the array at the key
|
||||
index, err = strconv.Atoi(arrayMatches[2])
|
||||
|
||||
if err != nil {
|
||||
// This should never happen. If it does, something has gone
|
||||
// seriously wrong. Panic.
|
||||
panic("objx: Array index is not an integer. Must use array[int].")
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if curMap, ok := current.(Map); ok {
|
||||
current = map[string]interface{}(curMap)
|
||||
}
|
||||
|
||||
// get the object in question
|
||||
switch current.(type) {
|
||||
case map[string]interface{}:
|
||||
curMSI := current.(map[string]interface{})
|
||||
if len(selSegs) <= 1 && isSet {
|
||||
curMSI[thisSel] = value
|
||||
return nil
|
||||
} else {
|
||||
current = curMSI[thisSel]
|
||||
}
|
||||
default:
|
||||
current = nil
|
||||
}
|
||||
|
||||
if current == nil && panics {
|
||||
panic(fmt.Sprintf("objx: '%v' invalid on object.", selector))
|
||||
}
|
||||
|
||||
// do we need to access the item of an array?
|
||||
if index > -1 {
|
||||
if array, ok := current.([]interface{}); ok {
|
||||
if index < len(array) {
|
||||
current = array[index]
|
||||
} else {
|
||||
if panics {
|
||||
panic(fmt.Sprintf("objx: Index %d is out of range. Slice only contains %d items.", index, len(array)))
|
||||
}
|
||||
current = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(selSegs) > 1 {
|
||||
current = access(current, selSegs[1], value, isSet, panics)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return current
|
||||
|
||||
}
|
||||
|
||||
// intFromInterface converts an interface object to the largest
|
||||
// representation of an unsigned integer using a type switch and
|
||||
// assertions
|
||||
func intFromInterface(selector interface{}) int {
|
||||
var value int
|
||||
switch selector.(type) {
|
||||
case int:
|
||||
value = selector.(int)
|
||||
case int8:
|
||||
value = int(selector.(int8))
|
||||
case int16:
|
||||
value = int(selector.(int16))
|
||||
case int32:
|
||||
value = int(selector.(int32))
|
||||
case int64:
|
||||
value = int(selector.(int64))
|
||||
case uint:
|
||||
value = int(selector.(uint))
|
||||
case uint8:
|
||||
value = int(selector.(uint8))
|
||||
case uint16:
|
||||
value = int(selector.(uint16))
|
||||
case uint32:
|
||||
value = int(selector.(uint32))
|
||||
case uint64:
|
||||
value = int(selector.(uint64))
|
||||
default:
|
||||
panic("objx: array access argument is not an integer type (this should never happen)")
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
|
@ -1,145 +0,0 @@
|
|||
package objx
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestAccessorsAccessGetSingleField(t *testing.T) {
|
||||
|
||||
current := map[string]interface{}{"name": "Tyler"}
|
||||
assert.Equal(t, "Tyler", access(current, "name", nil, false, true))
|
||||
|
||||
}
|
||||
func TestAccessorsAccessGetDeep(t *testing.T) {
|
||||
|
||||
current := map[string]interface{}{"name": map[string]interface{}{"first": "Tyler", "last": "Bunnell"}}
|
||||
assert.Equal(t, "Tyler", access(current, "name.first", nil, false, true))
|
||||
assert.Equal(t, "Bunnell", access(current, "name.last", nil, false, true))
|
||||
|
||||
}
|
||||
func TestAccessorsAccessGetDeepDeep(t *testing.T) {
|
||||
|
||||
current := map[string]interface{}{"one": map[string]interface{}{"two": map[string]interface{}{"three": map[string]interface{}{"four": 4}}}}
|
||||
assert.Equal(t, 4, access(current, "one.two.three.four", nil, false, true))
|
||||
|
||||
}
|
||||
func TestAccessorsAccessGetInsideArray(t *testing.T) {
|
||||
|
||||
current := map[string]interface{}{"names": []interface{}{map[string]interface{}{"first": "Tyler", "last": "Bunnell"}, map[string]interface{}{"first": "Capitol", "last": "Bollocks"}}}
|
||||
assert.Equal(t, "Tyler", access(current, "names[0].first", nil, false, true))
|
||||
assert.Equal(t, "Bunnell", access(current, "names[0].last", nil, false, true))
|
||||
assert.Equal(t, "Capitol", access(current, "names[1].first", nil, false, true))
|
||||
assert.Equal(t, "Bollocks", access(current, "names[1].last", nil, false, true))
|
||||
|
||||
assert.Panics(t, func() {
|
||||
access(current, "names[2]", nil, false, true)
|
||||
})
|
||||
assert.Nil(t, access(current, "names[2]", nil, false, false))
|
||||
|
||||
}
|
||||
|
||||
func TestAccessorsAccessGetFromArrayWithInt(t *testing.T) {
|
||||
|
||||
current := []interface{}{map[string]interface{}{"first": "Tyler", "last": "Bunnell"}, map[string]interface{}{"first": "Capitol", "last": "Bollocks"}}
|
||||
one := access(current, 0, nil, false, false)
|
||||
two := access(current, 1, nil, false, false)
|
||||
three := access(current, 2, nil, false, false)
|
||||
|
||||
assert.Equal(t, "Tyler", one.(map[string]interface{})["first"])
|
||||
assert.Equal(t, "Capitol", two.(map[string]interface{})["first"])
|
||||
assert.Nil(t, three)
|
||||
|
||||
}
|
||||
|
||||
func TestAccessorsGet(t *testing.T) {
|
||||
|
||||
current := New(map[string]interface{}{"name": "Tyler"})
|
||||
assert.Equal(t, "Tyler", current.Get("name").data)
|
||||
|
||||
}
|
||||
|
||||
func TestAccessorsAccessSetSingleField(t *testing.T) {
|
||||
|
||||
current := map[string]interface{}{"name": "Tyler"}
|
||||
access(current, "name", "Mat", true, false)
|
||||
assert.Equal(t, current["name"], "Mat")
|
||||
|
||||
access(current, "age", 29, true, true)
|
||||
assert.Equal(t, current["age"], 29)
|
||||
|
||||
}
|
||||
|
||||
func TestAccessorsAccessSetSingleFieldNotExisting(t *testing.T) {
|
||||
|
||||
current := map[string]interface{}{}
|
||||
access(current, "name", "Mat", true, false)
|
||||
assert.Equal(t, current["name"], "Mat")
|
||||
|
||||
}
|
||||
|
||||
func TestAccessorsAccessSetDeep(t *testing.T) {
|
||||
|
||||
current := map[string]interface{}{"name": map[string]interface{}{"first": "Tyler", "last": "Bunnell"}}
|
||||
|
||||
access(current, "name.first", "Mat", true, true)
|
||||
access(current, "name.last", "Ryer", true, true)
|
||||
|
||||
assert.Equal(t, "Mat", access(current, "name.first", nil, false, true))
|
||||
assert.Equal(t, "Ryer", access(current, "name.last", nil, false, true))
|
||||
|
||||
}
|
||||
func TestAccessorsAccessSetDeepDeep(t *testing.T) {
|
||||
|
||||
current := map[string]interface{}{"one": map[string]interface{}{"two": map[string]interface{}{"three": map[string]interface{}{"four": 4}}}}
|
||||
|
||||
access(current, "one.two.three.four", 5, true, true)
|
||||
|
||||
assert.Equal(t, 5, access(current, "one.two.three.four", nil, false, true))
|
||||
|
||||
}
|
||||
func TestAccessorsAccessSetArray(t *testing.T) {
|
||||
|
||||
current := map[string]interface{}{"names": []interface{}{"Tyler"}}
|
||||
|
||||
access(current, "names[0]", "Mat", true, true)
|
||||
|
||||
assert.Equal(t, "Mat", access(current, "names[0]", nil, false, true))
|
||||
|
||||
}
|
||||
func TestAccessorsAccessSetInsideArray(t *testing.T) {
|
||||
|
||||
current := map[string]interface{}{"names": []interface{}{map[string]interface{}{"first": "Tyler", "last": "Bunnell"}, map[string]interface{}{"first": "Capitol", "last": "Bollocks"}}}
|
||||
|
||||
access(current, "names[0].first", "Mat", true, true)
|
||||
access(current, "names[0].last", "Ryer", true, true)
|
||||
access(current, "names[1].first", "Captain", true, true)
|
||||
access(current, "names[1].last", "Underpants", true, true)
|
||||
|
||||
assert.Equal(t, "Mat", access(current, "names[0].first", nil, false, true))
|
||||
assert.Equal(t, "Ryer", access(current, "names[0].last", nil, false, true))
|
||||
assert.Equal(t, "Captain", access(current, "names[1].first", nil, false, true))
|
||||
assert.Equal(t, "Underpants", access(current, "names[1].last", nil, false, true))
|
||||
|
||||
}
|
||||
|
||||
func TestAccessorsAccessSetFromArrayWithInt(t *testing.T) {
|
||||
|
||||
current := []interface{}{map[string]interface{}{"first": "Tyler", "last": "Bunnell"}, map[string]interface{}{"first": "Capitol", "last": "Bollocks"}}
|
||||
one := access(current, 0, nil, false, false)
|
||||
two := access(current, 1, nil, false, false)
|
||||
three := access(current, 2, nil, false, false)
|
||||
|
||||
assert.Equal(t, "Tyler", one.(map[string]interface{})["first"])
|
||||
assert.Equal(t, "Capitol", two.(map[string]interface{})["first"])
|
||||
assert.Nil(t, three)
|
||||
|
||||
}
|
||||
|
||||
func TestAccessorsSet(t *testing.T) {
|
||||
|
||||
current := New(map[string]interface{}{"name": "Tyler"})
|
||||
current.Set("name", "Mat")
|
||||
assert.Equal(t, "Mat", current.Get("name").data)
|
||||
|
||||
}
|
14
pkg/api/Godeps/_workspace/src/github.com/stretchr/objx/codegen/array-access.txt
generated
vendored
14
pkg/api/Godeps/_workspace/src/github.com/stretchr/objx/codegen/array-access.txt
generated
vendored
|
@ -1,14 +0,0 @@
|
|||
case []{1}:
|
||||
a := object.([]{1})
|
||||
if isSet {
|
||||
a[index] = value.({1})
|
||||
} else {
|
||||
if index >= len(a) {
|
||||
if panics {
|
||||
panic(fmt.Sprintf("objx: Index %d is out of range because the []{1} only contains %d items.", index, len(a)))
|
||||
}
|
||||
return nil
|
||||
} else {
|
||||
return a[index]
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue