Merge pull request #446 from harshavardhana/pr_out_erasure_is_now_external_pull_it_and_update_from_minio_io_erasure

2025-03-31 01:33:41 -04:00 · 2015-04-05 23:53:19 -07:00 · 2015-04-05 23:53:19 -07:00 · 70c6258440
commit 70c6258440
parent 603b851226 319feb52f1
56 changed files with 12 additions and 21158 deletions
--- a/Godeps/Godeps.json
+++ b/Godeps/Godeps.json
@ -24,11 +24,11 @@
 		},
 		{
 			"ImportPath": "github.com/minio-io/donut",
-			"Rev": "5647e1e6c6a95caec431610a497b15f8298d56cf"
+			"Rev": "1cb5d3239ed989c4dd153af9931bcfb8ec4f0b87"
 		},
 		{
 			"ImportPath": "github.com/minio-io/erasure",
-			"Rev": "2a52bdad9b271ef680374a22f0cb68513a79ebf5"
+			"Rev": "8a72b14991a6835b4d30403e7cb201f373b7cb3a"
 		},
 		{
 			"ImportPath": "github.com/minio-io/iodine",
--- a/Godeps/_workspace/src/github.com/minio-io/donut/Godeps/Godeps.json
+++ b/Godeps/_workspace/src/github.com/minio-io/donut/Godeps/Godeps.json
@ -12,7 +12,7 @@
 		},
 		{
 			"ImportPath": "github.com/minio-io/erasure",
-			"Rev": "3cece1a107115563682604b1430418e28f65dd80"
+			"Rev": "8a72b14991a6835b4d30403e7cb201f373b7cb3a"
 		},
 		{
 			"ImportPath": "github.com/minio-io/minio/pkg/utils/split",
--- a/Godeps/_workspace/src/github.com/minio-io/erasure/cauchy_test.go
+++ b/Godeps/_workspace/src/github.com/minio-io/erasure/cauchy_test.go
@ -20,7 +20,7 @@ import (
 	"bytes"
 	"testing"

-	. "gopkg.in/check.v1"
+	. "github.com/minio-io/check"
 )

 type MySuite struct{}
--- a/Godeps/_workspace/src/github.com/minio-io/erasure/ec_types.h
+++ b/Godeps/_workspace/src/github.com/minio-io/erasure/ec_types.h
@ -41,7 +41,7 @@
 extern "C" {
 #endif

-#ifndef __unix__
+#if !defined(__unix__) && !defined(__APPLE__)
 #ifdef __MINGW32__
 # include <_mingw.h>
 #endif
@ -59,7 +59,7 @@ typedef unsigned char      UINT8;
 #endif


-#ifdef __unix__
+#if defined(__unix__) || defined(__APPLE__)
 # define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
 # define __forceinline static inline
 #else
--- a/Godeps/_workspace/src/github.com/minio-io/erasure/erasure_decode.go
+++ b/Godeps/_workspace/src/github.com/minio-io/erasure/erasure_decode.go
@ -35,6 +35,7 @@ import (
 // blocks. Data blocks are position and order dependent. Missing blocks
 // are set to "nil". There must be at least "K" number of data|parity
 // blocks.
+//
 // "dataLen" is the length of original source data
 func (e *Erasure) Decode(encodedDataBlocks [][]byte, dataLen int) (decodedData []byte, err error) {
 	var source, target **C.uchar
--- a/Godeps/_workspace/src/github.com/minio-io/erasure/vandermonde_test.go
+++ b/Godeps/_workspace/src/github.com/minio-io/erasure/vandermonde_test.go
@ -19,7 +19,7 @@ package erasure
 import (
 	"bytes"

-	. "gopkg.in/check.v1"
+	. "github.com/minio-io/check"
 )

 func corruptChunks(chunks [][]byte, errorIndex []int) [][]byte {
--- a/pkg/encoding/erasure/.gitignore
+++ b/pkg/encoding/erasure/.gitignore
@ -1,23 +0,0 @@
-*.o
-*.a
-*.so
-*~
-*.dSYM
-erasure-code-base-test
-erasure-code-sse-test
-erasure-code-test
-gf-2vect-dot-prod-sse-test
-gf-3vect-dot-prod-sse-test
-gf-4vect-dot-prod-sse-test
-gf-5vect-dot-prod-sse-test
-gf-6vect-dot-prod-sse-test
-gf-inverse-test
-gf-vect-dot-prod-avx-test
-gf-vect-dot-prod-base-test
-gf-vect-dot-prod-sse-test
-gf-vect-dot-prod-test
-gf-vect-mul-avx-test
-gf-vect-mul-base-test
-gf-vect-mul-sse-test
-gf-vect-mul-test
-*.syso
--- a/pkg/encoding/erasure/BUILDDEPS.md
+++ b/pkg/encoding/erasure/BUILDDEPS.md
@ -1,80 +0,0 @@
-## Ubuntu (Kylin) 14.04
-### Build Dependencies
-This installation document assumes Ubuntu 14.04 or later on x86-64 platform.
-
-##### Install YASM
-
-Erasure depends on Intel ISAL library, ISAL uses Intel AVX2 processor instructions, to compile these files one needs to install ``yasm`` which supports AVX2 instructions. AVX2 support only ended in ``yasm`` from version ``1.2.0``, any version below ``1.2.0`` will throw a build error.
-
-```sh
-$ sudo apt-get install yasm
-```
-
-##### Install Go 1.4+
-Download Go 1.4+ from [https://golang.org/dl/](https://golang.org/dl/) and extract it into ``${HOME}/local`` and setup ``${HOME}/mygo`` as your project workspace folder.
-For example:
-```sh
-.... Extract and install golang ....
-
-$ wget https://storage.googleapis.com/golang/go1.4.linux-amd64.tar.gz
-$ mkdir -p ${HOME}/local
-$ mkdir -p $HOME/mygo
-$ tar -C ${HOME}/local -xzf go1.4.linux-amd64.tar.gz
-
-.... Export necessary environment variables ....
-
-$ export PATH=$PATH:${HOME}/local/go/bin
-$ export GOROOT=${HOME}/local/go
-$ export GOPATH=$HOME/mygo
-$ export PATH=$PATH:$GOPATH/bin
-
-.... Add paths to your bashrc ....
-
-$ echo "export PATH=$PATH:${HOME}/local/go/bin" >> ${HOME}/.bashrc
-$ echo "export GOROOT=${HOME}/local/go" >> ${HOME}/.bashrc
-$ echo "export GOPATH=$HOME/mygo" >> ${HOME}/.bashrc
-$ echo "export PATH=$PATH:$GOPATH/bin" >> ${HOME}/.bashrc
-```
-
-## Mac OSX (Yosemite) 10.10
-### Build Dependencies
-This installation document assumes Mac OSX Yosemite 10.10 or later on x86-64 platform.
-
-##### Install brew
-```sh
-$ ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
-```
-
-##### Install Git
-```sh
-$ brew install git
-```
-
-##### Install YASM
-
-Erasure depends on Intel ISAL library, ISAL uses Intel AVX2 processor instructions, to compile these files one needs to install ``yasm`` which supports AVX2 instructions. AVX2 support only ended in ``yasm`` from version ``1.2.0``, any version below ``1.2.0`` will throw a build error.
-
-```sh
-$ brew install yasm
-```
-
-##### Install Go 1.4+
-On MacOSX ``brew.sh`` is the best way to install golang
-
-For example:
-```sh
-.... Install golang using `brew` ....
-
-$ brew install go
-$ mkdir -p $HOME/mygo
-
-.... Export necessary environment variables ....
-
-$ export GOPATH=$HOME/mygo
-$ export PATH=$PATH:$GOPATH/bin
-
-.... Add paths to your bashrc ....
-
-$ echo "export GOPATH=$HOME/mygo" >> ${HOME}/.bashrc
-$ echo "export PATH=$PATH:$GOPATH/bin" >> ${HOME}/.bashrc
-```
--- a/pkg/encoding/erasure/CONTRIBUTING.md
+++ b/pkg/encoding/erasure/CONTRIBUTING.md
@ -1,30 +0,0 @@
-### Setup your Erasure Github Repository
-Fork [Erasure upstream](https://github.com/minio-io/erasure/fork) source repository to your own personal repository. Copy the URL and pass it to ``go get`` command. Go uses git to clone a copy into your project workspace folder.
-```sh
-$ git clone https://github.com/$USER_ID/erasure
-$ cd erasure
-$ mkdir -p ${GOPATH}/src/github.com/minio-io
-$ ln -s ${PWD} $GOPATH/src/github.com/minio-io/
-```
-
-### Compiling Erasure from source
-```sh
-$ go generate
-$ go build
-```
-###  Developer Guidelines
-To make the process as seamless as possible, we ask for the following:
-* Go ahead and fork the project and make your changes. We encourage pull requests to discuss code changes.
-  - Fork it
-  - Create your feature branch (git checkout -b my-new-feature)
-  - Commit your changes (git commit -am 'Add some feature')
-  - Push to the branch (git push origin my-new-feature)
-  - Create new Pull Request
-* When you're ready to create a pull request, be sure to:
-  - Have test cases for the new code. If you have questions about how to do it, please ask in your pull request.
-  - Run `go fmt`
-  - Squash your commits into a single commit. `git rebase -i`. It's okay to force update your pull request.
-  - Make sure `go test -race ./...` and `go build` completes.
-* Read [Effective Go](https://github.com/golang/go/wiki/CodeReviewComments) article from Golang project
-  - `Erasure` project is strictly conformant with Golang style
-  - if you happen to observe offending code, please feel free to send a pull request
--- a/pkg/encoding/erasure/LICENSE.INTEL
+++ b/pkg/encoding/erasure/LICENSE.INTEL
@ -1,26 +0,0 @@
-  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions 
-  are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/pkg/encoding/erasure/LICENSE.MINIO
+++ b/pkg/encoding/erasure/LICENSE.MINIO
@ -1,202 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--- a/pkg/encoding/erasure/README.md
+++ b/pkg/encoding/erasure/README.md
@ -1,25 +0,0 @@
-## Introduction
-
-Erasure is an open source Golang library written on top of ISAL (Intel Intelligent Storage Library) released under [Apache license v2](./LICENSE)
-
-### Developers
-* [Get Source](./CONTRIBUTING.md)
-* [Build Dependencies](./BUILDDEPS.md)
-* [Development Workflow](./CONTRIBUTING.md#developer-guidelines)
-* [Developer discussions and bugs](https://github.com/Minio-io/erasure/issues)
-
-### Supported platforms
-
-| Name  | Supported |
-| ------------- | ------------- |
-| Linux  | Yes  |
-| Windows | Not yet |
-| Mac OSX | Yes |
-
-### Supported architectures
-
-| Arch | Supported |
-| ------------- | ------------- |
-| x86-64 | Yes |
-| arm64 | Not yet|
-| i386 | Never |
--- a/pkg/encoding/erasure/RELEASE-NOTES.INTEL
+++ b/pkg/encoding/erasure/RELEASE-NOTES.INTEL
@ -1,49 +0,0 @@
-================================================================================
-v2.10 Intel Intelligent Storage Acceleration Library Release Notes
-     Open Source Version
-================================================================================
-
-================================================================================
-RELEASE NOTE CONTENTS
-================================================================================
-1. KNOWN ISSUES
-2. FIXED ISSUES
-3. CHANGE LOG & FEATURES ADDED
-
-================================================================================
-1.  KNOWN ISSUES
-================================================================================
-
-* Only erasure code unit included in open source version at this time.
-
-* Perf tests do not run in Windows environment.
-
-* Leaving <unit>/bin directories from builds in unit directories will cause the
-  top-level make build to fail.  Build only in top-level or ensure unit
-  directories are clean of objects and /bin.
-
-* 32-bit lib is not supported in Windows.
-
-================================================================================
-2. FIXED ISSUES
-================================================================================
-v2.10
-
-* Fix for windows register save overlap in gf_{3-6}vect_dot_prod_sse.asm. Only
-  affects windows versions of erasure code.  GP register saves/restore were
-  pushed to same stack area as XMM.
-
-================================================================================
-3. CHANGE LOG & FEATURES ADDED 
-================================================================================
-v2.10
-
-* Erasure code updates
-  - New AVX and AVX2 support functions.
-  - Changes min len requirement on gf_vect_dot_prod() to 32 from 16.
-  - Tests include both source and parity recovery with ec_encode_data().
-  - New encoding examples with Vandermonde or Cauchy matrix.
-
-v2.8
-
-* First open release of erasure code unit that is part of ISA-L.
--- a/pkg/encoding/erasure/RELEASE-NOTES.MINIO
+++ b/pkg/encoding/erasure/RELEASE-NOTES.MINIO
@ -1,3 +0,0 @@
-v1.0 - Erasure Golang Package
-============================
- First release, supports only amd64 or x86-64 architecture
--- a/pkg/encoding/erasure/cauchy_test.go
+++ b/pkg/encoding/erasure/cauchy_test.go
@ -1,71 +0,0 @@
-/*
- * Minimalist Object Storage, (C) 2014 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package erasure
-
-import (
-	"bytes"
-	"testing"
-
-	. "github.com/minio-io/check"
-)
-
-type MySuite struct{}
-
-var _ = Suite(&MySuite{})
-
-func Test(t *testing.T) { TestingT(t) }
-
-const (
-	k = 10
-	m = 5
-)
-
-func (s *MySuite) TestCauchyEncodeDecodeFailure(c *C) {
-	ep, _ := ValidateParams(k, m, Cauchy)
-
-	data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
-
-	e := NewErasure(ep)
-	chunks, err := e.Encode(data)
-	c.Assert(err, IsNil)
-
-	errorIndex := []int{0, 3, 5, 9, 11, 13}
-	chunks = corruptChunks(chunks, errorIndex)
-
-	_, err = e.Decode(chunks, len(data))
-	c.Assert(err, Not(IsNil))
-}
-
-func (s *MySuite) TestCauchyEncodeDecodeSuccess(c *C) {
-	ep, _ := ValidateParams(k, m, Cauchy)
-
-	data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
-
-	e := NewErasure(ep)
-	chunks, err := e.Encode(data)
-	c.Assert(err, IsNil)
-
-	errorIndex := []int{0, 3, 5, 9, 13}
-	chunks = corruptChunks(chunks, errorIndex)
-
-	recoveredData, err := e.Decode(chunks, len(data))
-	c.Assert(err, IsNil)
-
-	if !bytes.Equal(data, recoveredData) {
-		c.Fatalf("Recovered data mismatches with original data")
-	}
-}
--- a/pkg/encoding/erasure/ctypes.go
+++ b/pkg/encoding/erasure/ctypes.go
@ -1,59 +0,0 @@
-/*
- * Minimalist Object Storage, (C) 2014 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package erasure
-
-// #include <stdint.h>
-import "C"
-import (
-	"fmt"
-	"unsafe"
-)
-
-// intSlice2CIntArray converts Go int slice to C int array
-func intSlice2CIntArray(srcErrList []int) *C.int32_t {
-	var sizeErrInt = int(unsafe.Sizeof(srcErrList[0]))
-	switch sizeInt {
-	case sizeErrInt:
-		return (*C.int32_t)(unsafe.Pointer(&srcErrList[0]))
-	case sizeInt8:
-		int8Array := make([]int8, len(srcErrList))
-		for i, v := range srcErrList {
-			int8Array[i] = int8(v)
-		}
-		return (*C.int32_t)(unsafe.Pointer(&int8Array[0]))
-	case sizeInt16:
-		int16Array := make([]int16, len(srcErrList))
-		for i, v := range srcErrList {
-			int16Array[i] = int16(v)
-		}
-		return (*C.int32_t)(unsafe.Pointer(&int16Array[0]))
-	case sizeInt32:
-		int32Array := make([]int32, len(srcErrList))
-		for i, v := range srcErrList {
-			int32Array[i] = int32(v)
-		}
-		return (*C.int32_t)(unsafe.Pointer(&int32Array[0]))
-	case sizeInt64:
-		int64Array := make([]int64, len(srcErrList))
-		for i, v := range srcErrList {
-			int64Array[i] = int64(v)
-		}
-		return (*C.int32_t)(unsafe.Pointer(&int64Array[0]))
-	default:
-		panic(fmt.Sprintf("Unsupported: %d", sizeInt))
-	}
-}
--- a/pkg/encoding/erasure/doc.go
+++ b/pkg/encoding/erasure/doc.go
@ -1,67 +0,0 @@
-// Package erasure is a Go wrapper for the Intel Intelligent Storage
-// Acceleration Library (Intel ISA-L).  Intel ISA-L is a CPU optimized
-// implementation of erasure coding algorithms.
-//
-// For more information on Intel ISA-L, please visit:
-// https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version
-//
-// Usage:
-//
-// Encode encodes a block of data. The input is the original data. The output
-// is a 2 tuple containing (k + m) chunks of erasure encoded data and the
-// length of the original object.
-//
-// Decode decodes 2 tuple data containing (k + m) chunks back into its original form.
-// Additionally original block length should also be provided as input.
-//
-// Decoded data is exactly similar in length and content as the original data.
-//
-// Encoding data may be performed in 3 steps.
-//
-//  1. Create a parse set of encoder parameters
-//  2. Create a new encoder
-//  3. Encode data
-//
-// Decoding data is also performed in 3 steps.
-//
-//  1. Create a parse set of encoder parameters for validation
-//  2. Create a new encoder
-//  3. Decode data
-//
-// Erasure parameters contain three configurable elements:
-//  ValidateParams(k, m, technique int) (ErasureParams, error)
-//  k - Number of rows in matrix
-//  m - Number of colums in matrix
-//  technique - Matrix type, can be either Cauchy (recommended) or Vandermonde
-//  constraints: k + m < Galois Field (2^8)
-//
-// Choosing right parity and matrix technique is left for application to decide.
-//
-// But here are the few points to keep in mind
-//
-//  Techniques:
-//     - Vandermonde is most commonly used method for choosing coefficients in erasure
-//       encoding but does not guarantee invertable for every sub matrix.
-//       Users may want to adjust for k > 5. (k is data blocks)
-//     - Whereas Cauchy is our recommended method for choosing coefficients in erasure coding.
-//       Since any sub-matrix of a Cauchy matrix is invertable.
-//
-//  Total blocks:
-//     - Data blocks and Parity blocks should not be greater than 'Galois Field' (2^8)
-//
-// Example
-//
-// Creating and using an encoder
-//  var bytes []byte
-//  params := erasure.ValidateParams(10, 5, erasure.Cauchy)
-//  encoder := erasure.NewErasure(params)
-//  encodedData, length := encoder.Encode(bytes)
-//
-// Creating and using a decoder
-//  var encodedData [][]byte
-//  var length int
-//  params := erasure.ValidateParams(10, 5, erasure.Cauchy)
-//  encoder := erasure.NewErasure(params)
-//  originalData, err := encoder.Decode(encodedData, length)
-//
-package erasure
--- a/pkg/encoding/erasure/docs/isa-l_open_src_2.10.pdf
+++ b/pkg/encoding/erasure/docs/isa-l_open_src_2.10.pdf
--- a/pkg/encoding/erasure/ec-base.c
+++ b/pkg/encoding/erasure/ec-base.c
@ -1,321 +0,0 @@
-/**********************************************************************
-  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-**********************************************************************/
-
-#include <limits.h>
-#include <stdint.h>
-#include <string.h>             // for memset
-#include "ec-code.h"
-#include "ec-base.h"            // for GF tables
-#include "ec-ctypes.h"
-
-uint8_t gf_mul(uint8_t a, uint8_t b)
-{
-#ifndef GF_LARGE_TABLES
-        int i;
-
-        if ((a == 0) || (b == 0))
-                return 0;
-
-        return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i];
-#else
-        return gf_mul_table_base[b * 256 + a];
-#endif
-}
-
-uint8_t gf_inv(uint8_t a)
-{
-#ifndef GF_LARGE_TABLES
-        if (a == 0)
-                return 0;
-
-        return gff_base[255 - gflog_base[a]];
-#else
-        return gf_inv_table_base[a];
-#endif
-}
-
-void gf_gen_rs_matrix(uint8_t *a, int m, int k)
-{
-        int i, j;
-        uint8_t p, gen = 1;
-
-        memset(a, 0, k * m);
-        for (i = 0; i < k; i++)
-                a[k * i + i] = 1;
-
-        for (i = k; i < m; i++) {
-                p = 1;
-                for (j = 0; j < k; j++) {
-                        a[k * i + j] = p;
-                        p = gf_mul(p, gen);
-                }
-                gen = gf_mul(gen, 2);
-        }
-}
-
-void gf_gen_cauchy1_matrix(uint8_t *a, int m, int k)
-{
-        int i, j;
-        uint8_t *p;
-
-        // Identity matrix in high position
-        memset(a, 0, k * m);
-        for (i = 0; i < k; i++)
-                a[k * i + i] = 1;
-
-        // For the rest choose 1/(i + j) | i != j
-        p = &a[k * k];
-        for (i = k; i < m; i++)
-                for (j = 0; j < k; j++)
-                        *p++ = gf_inv(i ^ j);
-
-}
-
-int gf_invert_matrix(uint8_t *in_mat, uint8_t *out_mat, const int n)
-{
-        int i, j, k;
-        uint8_t temp;
-
-        // Set out_mat[] to the identity matrix
-        for (i = 0; i < n * n; i++)     // memset(out_mat, 0, n*n)
-                out_mat[i] = 0;
-
-        for (i = 0; i < n; i++)
-                out_mat[i * n + i] = 1;
-
-        // Inverse
-        for (i = 0; i < n; i++) {
-                // Check for 0 in pivot element
-                if (in_mat[i * n + i] == 0) {
-                        // Find a row with non-zero in current column and swap
-                        for (j = i + 1; j < n; j++)
-                                if (in_mat[j * n + i])
-                                        break;
-
-                        if (j == n)     // Couldn't find means it's singular
-                                return -1;
-
-                        for (k = 0; k < n; k++) {       // Swap rows i,j
-                                temp = in_mat[i * n + k];
-                                in_mat[i * n + k] = in_mat[j * n + k];
-                                in_mat[j * n + k] = temp;
-
-                                temp = out_mat[i * n + k];
-                                out_mat[i * n + k] = out_mat[j * n + k];
-                                out_mat[j * n + k] = temp;
-                        }
-                }
-
-                temp = gf_inv(in_mat[i * n + i]);       // 1/pivot
-                for (j = 0; j < n; j++) {       // Scale row i by 1/pivot
-                        in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
-                        out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
-                }
-
-                for (j = 0; j < n; j++) {
-                        if (j == i)
-                                continue;
-
-                        temp = in_mat[j * n + i];
-                        for (k = 0; k < n; k++) {
-                                out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
-                                in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
-                        }
-                }
-        }
-        return 0;
-}
-
-// Calculates const table gftbl in GF(2^8) from single input A
-// gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} }
-
-void gf_vect_mul_init(uint8_t c, uint8_t *tbl)
-{
-        uint8_t c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0);  //Mult by GF{2}
-        uint8_t c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0);        //Mult by GF{2}
-        uint8_t c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0);        //Mult by GF{2}
-
-#if __WORDSIZE == 64 || _WIN64 || __x86_64__
-        unsigned long long v1, v2, v4, v8, *t;
-        unsigned long long v10, v20, v40, v80;
-        uint8_t c17, c18, c20, c24;
-
-        t = (unsigned long long *)tbl;
-
-        v1 = c * 0x0100010001000100ull;
-        v2 = c2 * 0x0101000001010000ull;
-        v4 = c4 * 0x0101010100000000ull;
-        v8 = c8 * 0x0101010101010101ull;
-
-        v4 = v1 ^ v2 ^ v4;
-        t[0] = v4;
-        t[1] = v8 ^ v4;
-
-        c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0);     //Mult by GF{2}
-        c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0);   //Mult by GF{2}
-        c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0);   //Mult by GF{2}
-        c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0);   //Mult by GF{2}
-
-        v10 = c17 * 0x0100010001000100ull;
-        v20 = c18 * 0x0101000001010000ull;
-        v40 = c20 * 0x0101010100000000ull;
-        v80 = c24 * 0x0101010101010101ull;
-
-        v40 = v10 ^ v20 ^ v40;
-        t[2] = v40;
-        t[3] = v80 ^ v40;
-
-#else // 32-bit or other
-        uint8_t c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
-        uint8_t c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30,
-            c31;
-
-        c3 = c2 ^ c;
-        c5 = c4 ^ c;
-        c6 = c4 ^ c2;
-        c7 = c4 ^ c3;
-
-        c9 = c8 ^ c;
-        c10 = c8 ^ c2;
-        c11 = c8 ^ c3;
-        c12 = c8 ^ c4;
-        c13 = c8 ^ c5;
-        c14 = c8 ^ c6;
-        c15 = c8 ^ c7;
-
-        tbl[0] = 0;
-        tbl[1] = c;
-        tbl[2] = c2;
-        tbl[3] = c3;
-        tbl[4] = c4;
-        tbl[5] = c5;
-        tbl[6] = c6;
-        tbl[7] = c7;
-        tbl[8] = c8;
-        tbl[9] = c9;
-        tbl[10] = c10;
-        tbl[11] = c11;
-        tbl[12] = c12;
-        tbl[13] = c13;
-        tbl[14] = c14;
-        tbl[15] = c15;
-
-        c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0);     //Mult by GF{2}
-        c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0);   //Mult by GF{2}
-        c19 = c18 ^ c17;
-        c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0);   //Mult by GF{2}
-        c21 = c20 ^ c17;
-        c22 = c20 ^ c18;
-        c23 = c20 ^ c19;
-        c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0);   //Mult by GF{2}
-        c25 = c24 ^ c17;
-        c26 = c24 ^ c18;
-        c27 = c24 ^ c19;
-        c28 = c24 ^ c20;
-        c29 = c24 ^ c21;
-        c30 = c24 ^ c22;
-        c31 = c24 ^ c23;
-
-        tbl[16] = 0;
-        tbl[17] = c17;
-        tbl[18] = c18;
-        tbl[19] = c19;
-        tbl[20] = c20;
-        tbl[21] = c21;
-        tbl[22] = c22;
-        tbl[23] = c23;
-        tbl[24] = c24;
-        tbl[25] = c25;
-        tbl[26] = c26;
-        tbl[27] = c27;
-        tbl[28] = c28;
-        tbl[29] = c29;
-        tbl[30] = c30;
-        tbl[31] = c31;
-
-#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
-}
-
-void gf_vect_dot_prod_base(int len, int vlen, uint8_t *v,
-                           uint8_t **src, uint8_t *dest)
-{
-        int i, j;
-        uint8_t s;
-        for (i = 0; i < len; i++) {
-                s = 0;
-                for (j = 0; j < vlen; j++)
-                        s ^= gf_mul(src[j][i], v[j * 32 + 1]);
-
-                dest[i] = s;
-        }
-}
-
-void ec_encode_data_base(int len, int srcs, int dests, uint8_t *v,
-                         uint8_t **src, uint8_t **dest)
-{
-        int i, j, l;
-        uint8_t s;
-
-        for (l = 0; l < dests; l++) {
-                for (i = 0; i < len; i++) {
-                        s = 0;
-                        for (j = 0; j < srcs; j++)
-                                s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
-
-                        dest[l][i] = s;
-                }
-        }
-}
-
-void gf_vect_mul_base(int len, uint8_t *a, uint8_t *src, uint8_t *dest)
-{
-        //2nd element of table array is ref value used to fill it in
-        uint8_t c = a[1];
-        while (len-- > 0)
-                *dest++ = gf_mul(c, *src++);
-}
-
-struct slver {
-        UINT16 snum;
-        UINT8 ver;
-        UINT8 core;
-};
-
-// Version info
-struct slver gf_vect_mul_init_slver_00020035;
-struct slver gf_vect_mul_init_slver = { 0x0035, 0x02, 0x00 };
-
-struct slver ec_encode_data_base_slver_00010135;
-struct slver ec_encode_data_base_slver = { 0x0135, 0x01, 0x00 };
-
-struct slver gf_vect_mul_base_slver_00010136;
-struct slver gf_vect_mul_base_slver = { 0x0136, 0x01, 0x00 };
-
-struct slver gf_vect_dot_prod_base_slver_00010137;
-struct slver gf_vect_dot_prod_base_slver = { 0x0137, 0x01, 0x00 };
--- a/pkg/encoding/erasure/ec-base.h
+++ b/pkg/encoding/erasure/ec-base.h
--- a/pkg/encoding/erasure/ec-code.h
+++ b/pkg/encoding/erasure/ec-code.h
@ -1,660 +0,0 @@
-/**********************************************************************
-  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-**********************************************************************/
-
-
-#ifndef _ERASURE_CODE_H_
-#define _ERASURE_CODE_H_
-
-#include <stdint.h>
-/**
- *  @file erasure_code.h
- *  @brief Interface to functions supporting erasure code encode and decode.
- *
- *  This file defines the interface to optimized functions used in erasure
- *  codes.  Encode and decode of erasures in GF(2^8) are made by calculating the
- *  dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
- *  set of coefficients.  Values for the coefficients are determined by the type
- *  of erasure code.  Using a general dot product means that any sequence of
- *  coefficients may be used including erasure codes based on random
- *  coefficients.
- *  Multiple versions of dot product are supplied to calculate 1-6 output
- *  vectors in one pass.
- *  Base GF multiply and divide functions can be sped up by defining
- *  GF_LARGE_TABLES at the expense of memory size.
- *
- */
-
-#include "ec-vect-mul.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * @brief Initialize tables for fast Erasure Code encode and decode.
- *
- * Generates the expanded tables needed for fast encode or decode for erasure
- * codes on blocks of data.  32bytes is generated for each input coefficient.
- *
- * @param k      The number of vector sources or rows in the generator matrix
- *               for coding.
- * @param rows   The number of output vectors to concurrently encode/decode.
- * @param a      Pointer to sets of arrays of input coefficients used to encode
- *               or decode data.
- * @param gftbls Pointer to start of space for concatenated output tables
- *               generated from input coefficients.  Must be of size 32*k*rows.
- * @returns none
- */
-
-void ec_init_tables(int k, int rows, uint8_t* a, uint8_t* gftbls);
-
-/**
- * @brief Generate or decode erasure codes on blocks of data.
- *
- * Given a list of source data blocks, generate one or multiple blocks of
- * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
- * suitable set of coefficients, this function will perform the fast generation
- * or decoding of Reed-Solomon type erasure codes.
- *
- * @requires SSE4.1
- * @param len    Length of each block of data (vector) of source or dest data.
- * @param k      The number of vector sources or rows in the generator matrix
- * 		 for coding.
- * @param rows   The number of output vectors to concurrently encode/decode.
- * @param gftbls Pointer to array of input tables generated from coding
- *               coefficients in ec_init_tables(). Must be of size 32*k*rows
- * @param data   Array of pointers to source input buffers.
- * @param coding Array of pointers to coded output buffers.
- * @returns none
- */
-
-void ec_encode_data_sse(int len, int k, int rows, uint8_t *gftbls, uint8_t **data, uint8_t **coding);
-
-
-/**
- * @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
- *
- * Given a list of source data blocks, generate one or multiple blocks of
- * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
- * suitable set of coefficients, this function will perform the fast generation
- * or decoding of Reed-Solomon type erasure codes.
- *
- * This function determines what instruction sets are enabled and
- * selects the appropriate version at runtime.
- *
- * @param len    Length of each block of data (vector) of source or dest data.
- * @param k      The number of vector sources or rows in the generator matrix
- * 		 for coding.
- * @param rows   The number of output vectors to concurrently encode/decode.
- * @param gftbls Pointer to array of input tables generated from coding
- * 		 coefficients in ec_init_tables(). Must be of size 32*k*rows
- * @param data   Array of pointers to source input buffers.
- * @param coding Array of pointers to coded output buffers.
- * @returns none
- */
-
-void ec_encode_data(int len, int k, int rows, uint8_t *gftbls, uint8_t **data, uint8_t **coding);
-
-
-/**
- * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
- *
- * Given a list of source data blocks, generate one or multiple blocks of
- * encoded data as specified by a matrix of GF(2^8) coefficients.  When given a
- * suitable set of coefficients, this function will perform the fast generation
- * or decoding of Reed-Solomon type erasure codes.
- *
- * @param len    Length of each block of data (vector) of source or dest data.
- * @param srcs   The number of vector sources or rows in the generator matrix
- * 		 for coding.
- * @param dests  The number of output vectors to concurrently encode/decode.
- * @param v      Pointer to array of input tables generated from coding
- * 		 coefficients in ec_init_tables(). Must be of size 32*k*rows
- * @param src    Array of pointers to source input buffers.
- * @param dest   Array of pointers to coded output buffers.
- * @returns none
- */
-
-void ec_encode_data_base(int len, int srcs, int dests, uint8_t *v, uint8_t **src, uint8_t **dest);
-
-
-/**
- * @brief GF(2^8) vector dot product.
- *
- * Does a GF(2^8) dot product across each byte of the input array and a constant
- * set of coefficients to produce each byte of the output. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 32*vlen byte constant array based on the input coefficients.
- *
- * @requires SSE4.1
- * @param len    Length of each vector in bytes. Must be >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
- *               on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Pointer to destination data array.
- * @returns none
- */
-
-void gf_vect_dot_prod_sse(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t *dest);
-
-/**
- * @brief GF(2^8) vector dot product.
- *
- * Does a GF(2^8) dot product across each byte of the input array and a constant
- * set of coefficients to produce each byte of the output. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 32*vlen byte constant array based on the input coefficients.
- *
- * @requires AVX
- * @param len    Length of each vector in bytes. Must be >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
- *               on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Pointer to destination data array.
- * @returns none
- */
-
-void gf_vect_dot_prod_avx(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t *dest);
-
-/**
- * @brief GF(2^8) vector dot product.
- *
- * Does a GF(2^8) dot product across each byte of the input array and a constant
- * set of coefficients to produce each byte of the output. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 32*vlen byte constant array based on the input coefficients.
- *
- * @requires AVX2
- * @param len    Length of each vector in bytes. Must be >= 32.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
- *               on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Pointer to destination data array.
- * @returns none
- */
-
-void gf_vect_dot_prod_avx2(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t *dest);
-
-/**
- * @brief GF(2^8) vector dot product with two outputs.
- *
- * Vector dot product optimized to calculate two ouputs at a time. Does two
- * GF(2^8) dot products across each byte of the input array and two constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 2*32*vlen byte constant array based on the two sets of input coefficients.
- *
- * @requires SSE4.1
- * @param len    Length of each vector in bytes. Must be >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_2vect_dot_prod_sse(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with two outputs.
- *
- * Vector dot product optimized to calculate two ouputs at a time. Does two
- * GF(2^8) dot products across each byte of the input array and two constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 2*32*vlen byte constant array based on the two sets of input coefficients.
- *
- * @requires AVX
- * @param len    Length of each vector in bytes. Must be >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_2vect_dot_prod_avx(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with two outputs.
- *
- * Vector dot product optimized to calculate two ouputs at a time. Does two
- * GF(2^8) dot products across each byte of the input array and two constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 2*32*vlen byte constant array based on the two sets of input coefficients.
- *
- * @requires AVX2
- * @param len    Length of each vector in bytes. Must be >= 32.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_2vect_dot_prod_avx2(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with three outputs.
- *
- * Vector dot product optimized to calculate three ouputs at a time. Does three
- * GF(2^8) dot products across each byte of the input array and three constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 3*32*vlen byte constant array based on the three sets of input coefficients.
- *
- * @requires SSE4.1
- * @param len    Length of each vector in bytes. Must be >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_3vect_dot_prod_sse(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with three outputs.
- *
- * Vector dot product optimized to calculate three ouputs at a time. Does three
- * GF(2^8) dot products across each byte of the input array and three constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 3*32*vlen byte constant array based on the three sets of input coefficients.
- *
- * @requires AVX
- * @param len    Length of each vector in bytes. Must be >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_3vect_dot_prod_avx(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with three outputs.
- *
- * Vector dot product optimized to calculate three ouputs at a time. Does three
- * GF(2^8) dot products across each byte of the input array and three constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 3*32*vlen byte constant array based on the three sets of input coefficients.
- *
- * @requires AVX2
- * @param len    Length of each vector in bytes. Must be >= 32.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_3vect_dot_prod_avx2(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with four outputs.
- *
- * Vector dot product optimized to calculate four ouputs at a time. Does four
- * GF(2^8) dot products across each byte of the input array and four constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 4*32*vlen byte constant array based on the four sets of input coefficients.
- *
- * @requires SSE4.1
- * @param len    Length of each vector in bytes. Must be >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_4vect_dot_prod_sse(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with four outputs.
- *
- * Vector dot product optimized to calculate four ouputs at a time. Does four
- * GF(2^8) dot products across each byte of the input array and four constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 4*32*vlen byte constant array based on the four sets of input coefficients.
- *
- * @requires AVX
- * @param len    Length of each vector in bytes. Must be >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_4vect_dot_prod_avx(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with four outputs.
- *
- * Vector dot product optimized to calculate four ouputs at a time. Does four
- * GF(2^8) dot products across each byte of the input array and four constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 4*32*vlen byte constant array based on the four sets of input coefficients.
- *
- * @requires AVX2
- * @param len    Length of each vector in bytes. Must be >= 32.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_4vect_dot_prod_avx2(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with five outputs.
- *
- * Vector dot product optimized to calculate five ouputs at a time. Does five
- * GF(2^8) dot products across each byte of the input array and five constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 5*32*vlen byte constant array based on the five sets of input coefficients.
- *
- * @requires SSE4.1
- * @param len    Length of each vector in bytes. Must >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_5vect_dot_prod_sse(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with five outputs.
- *
- * Vector dot product optimized to calculate five ouputs at a time. Does five
- * GF(2^8) dot products across each byte of the input array and five constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 5*32*vlen byte constant array based on the five sets of input coefficients.
- *
- * @requires AVX
- * @param len    Length of each vector in bytes. Must >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_5vect_dot_prod_avx(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with five outputs.
- *
- * Vector dot product optimized to calculate five ouputs at a time. Does five
- * GF(2^8) dot products across each byte of the input array and five constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 5*32*vlen byte constant array based on the five sets of input coefficients.
- *
- * @requires AVX2
- * @param len    Length of each vector in bytes. Must >= 32.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_5vect_dot_prod_avx2(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with six outputs.
- *
- * Vector dot product optimized to calculate six ouputs at a time. Does six
- * GF(2^8) dot products across each byte of the input array and six constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 6*32*vlen byte constant array based on the six sets of input coefficients.
- *
- * @requires SSE4.1
- * @param len    Length of each vector in bytes. Must be >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_6vect_dot_prod_sse(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with six outputs.
- *
- * Vector dot product optimized to calculate six ouputs at a time. Does six
- * GF(2^8) dot products across each byte of the input array and six constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 6*32*vlen byte constant array based on the six sets of input coefficients.
- *
- * @requires AVX
- * @param len    Length of each vector in bytes. Must be >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_6vect_dot_prod_avx(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product with six outputs.
- *
- * Vector dot product optimized to calculate six ouputs at a time. Does six
- * GF(2^8) dot products across each byte of the input array and six constant
- * sets of coefficients to produce each byte of the outputs. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 6*32*vlen byte constant array based on the six sets of input coefficients.
- *
- * @requires AVX2
- * @param len    Length of each vector in bytes. Must be >= 32.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
- *               based on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Array of pointers to destination data buffers.
- * @returns none
- */
-
-void gf_6vect_dot_prod_avx2(int len, int vlen, uint8_t *gftbls,
-			uint8_t **src, uint8_t **dest);
-
-/**
- * @brief GF(2^8) vector dot product, runs baseline version.
- *
- * Does a GF(2^8) dot product across each byte of the input array and a constant
- * set of coefficients to produce each byte of the output. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 32*vlen byte constant array based on the input coefficients.
- *
- * @param len    Length of each vector in bytes. Must be >= 16.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
- *               on the array of input coefficients. Only elements 32*CONST*j + 1
- *               of this array are used, where j = (0, 1, 2...) and CONST is the
- *               number of elements in the array of input coefficients. The
- *               elements used correspond to the original input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Pointer to destination data array.
- * @returns none
- */
-
-void gf_vect_dot_prod_base(int len, int vlen, uint8_t *gftbls,
-                        uint8_t **src, uint8_t *dest);
-
-/**
- * @brief GF(2^8) vector dot product, runs appropriate version.
- *
- * Does a GF(2^8) dot product across each byte of the input array and a constant
- * set of coefficients to produce each byte of the output. Can be used for
- * erasure coding encode and decode. Function requires pre-calculation of a
- * 32*vlen byte constant array based on the input coefficients.
- *
- * This function determines what instruction sets are enabled and
- * selects the appropriate version at runtime.
- *
- * @param len    Length of each vector in bytes. Must be >= 32.
- * @param vlen   Number of vector sources.
- * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
- *               on the array of input coefficients.
- * @param src    Array of pointers to source inputs.
- * @param dest   Pointer to destination data array.
- * @returns none
- */
-
-void gf_vect_dot_prod(int len, int vlen, uint8_t *gftbls,
-                        uint8_t **src, uint8_t *dest);
-
-/**********************************************************************
- * The remaining are lib support functions used in GF(2^8) operations.
- */
-
-/**
- * @brief Single element GF(2^8) multiply.
- *
- * @param a  Multiplicand a
- * @param b  Multiplicand b
- * @returns  Product of a and b in GF(2^8)
- */
-
-uint8_t gf_mul(uint8_t a, uint8_t b);
-
-/**
- * @brief Single element GF(2^8) inverse.
- *
- * @param a  Input element
- * @returns  Field element b such that a x b = {1}
- */
-
-uint8_t gf_inv(uint8_t a);
-
-/**
- * @brief Generate a matrix of coefficients to be used for encoding.
- *
- * Vandermonde matrix example of encoding coefficients where high portion of
- * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
- * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
- * erasure encoding but does not guarantee invertable for every sub matrix.  For
- * large k it is possible to find cases where the decode matrix chosen from
- * sources and parity not in erasure are not invertable. Users may want to
- * adjust for k > 5.
- *
- * @param a  [mxk] array to hold coefficients
- * @param m  number of rows in matrix corresponding to srcs + parity.
- * @param k  number of columns in matrix corresponding to srcs.
- * @returns  none
- */
-
-void gf_gen_rs_matrix(uint8_t *a, int m, int k);
-
-/**
- * @brief Generate a Cauchy matrix of coefficients to be used for encoding.
- *
- * Cauchy matrix example of encoding coefficients where high portion of matrix
- * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
- * i:{0,k-1} j:{k,m-1}.  Any sub-matrix of a Cauchy matrix should be invertable.
- *
- * @param a  [mxk] array to hold coefficients
- * @param m  number of rows in matrix corresponding to srcs + parity.
- * @param k  number of columns in matrix corresponding to srcs.
- * @returns  none
- */
-
-void gf_gen_cauchy1_matrix(uint8_t *a, int m, int k);
-
-/**
- * @brief Invert a matrix in GF(2^8)
- *
- * @param in  input matrix
- * @param out output matrix such that [in] x [out] = [I] - identity matrix
- * @param n   size of matrix [nxn]
- * @returns 0 successful, other fail on singular input matrix
- */
-
-int gf_invert_matrix(uint8_t *in, uint8_t *out, const int n);
-
-/*************************************************************/
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif //_ERASURE_CODE_H_
--- a/pkg/encoding/erasure/ec-common.h
+++ b/pkg/encoding/erasure/ec-common.h
@ -1,39 +0,0 @@
-/*
- * Minimalist Object Storage, (C) 2014 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __COMMON_H__
-#define __COMMON_H__
-
-#include <stdint.h>
-
-int32_t minio_init_encoder (int technique, int k, int m,
-                            uint8_t **encode_matrix,
-                            uint8_t **encode_tbls);
-
-int32_t minio_init_decoder (int32_t *error_index,
-                            int k, int n, int errs,
-                            uint8_t *encoding_matrix,
-                            uint8_t **decode_matrix,
-                            uint8_t **decode_tbls,
-                            uint32_t **decode_index);
-
-int32_t minio_get_source_target (int errs, int k, int m,
-                                 int32_t *error_index,
-                                 uint32_t *decode_index,
-                                 uint8_t **buffs,
-                                 uint8_t ***source,
-                                 uint8_t ***target);
-#endif /* __COMMON_H__ */
--- a/pkg/encoding/erasure/ec-ctypes.h
+++ b/pkg/encoding/erasure/ec-ctypes.h
@ -1,80 +0,0 @@
-/**********************************************************************
-  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-**********************************************************************/
-
-
-/**
- *  @file  types.h
- *  @brief Defines standard width types.
- *
- */
-
-#ifndef __ERASURE_TYPES_H
-#define __ERASURE_TYPES_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if !defined(__unix__) && !defined(__APPLE__)
-#ifdef __MINGW32__
-# include <_mingw.h>
-#endif
-typedef unsigned __int64 UINT64;
-typedef          __int64  INT64;
-typedef unsigned __int32 UINT32;
-typedef unsigned __int16 UINT16;
-typedef unsigned char    UINT8;
-#else
-typedef unsigned long int  UINT64;
-typedef          long int   INT64;
-typedef unsigned int       UINT32;
-typedef unsigned short int UINT16;
-typedef unsigned char      UINT8;
-#endif
-
-
-#if defined(__unix__) || defined(__APPLE__)
-# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
-# define __forceinline static inline
-#else
-# define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
-# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
-#endif
-
-#ifdef DEBUG
-# define DEBUG_PRINT(x) printf x
-#else
-# define DEBUG_PRINT(x) do {} while (0)
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  //__ERASURE_TYPES_H
--- a/pkg/encoding/erasure/ec-decode.c
+++ b/pkg/encoding/erasure/ec-decode.c
@ -1,134 +0,0 @@
-/*
- * Minimalist Object Storage, (C) 2014 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "ec-code.h"
-#include "ec-common.h"
-
-static
-int32_t _minio_src_index_in_error (int r, int32_t *error_index)
-{
-        int i;
-        for (i = 0; error_index[i] != -1; i++) {
-                if (error_index[i] == r) {
-                        // true
-                        return 1;
-                }
-        }
-        // false
-        return 0;
-}
-
-// Separate out source data and target buffers
-int32_t minio_get_source_target (int errs, int k, int m,
-                                 int32_t *error_index,
-                                 uint32_t *decode_index,
-                                 uint8_t **buffs,
-                                 uint8_t ***source,
-                                 uint8_t ***target)
-{
-        int i;
-        uint8_t *tmp_source[k];
-        uint8_t *tmp_target[m];
-
-        if (k < 0 || m < 0) {
-                return -1;
-        }
-
-        memset (tmp_source, 0, k);
-        memset (tmp_target, 0, m);
-
-        for (i = 0; i < k; i++) {
-                tmp_source[i] = (uint8_t *) buffs[decode_index[i]];
-        }
-
-        for (i = 0; i < m; i++) {
-                if (i < errs)
-                        tmp_target[i] = (uint8_t *) buffs[error_index[i]];
-        }
-
-        *source = tmp_source;
-        *target = tmp_target;
-
-	return 0;
-}
-
-/*
-  Generate decode matrix during the decoding phase
-*/
-
-int minio_init_decoder (int32_t *error_index,
-                        int k, int n, int errs,
-                        uint8_t *encode_matrix,
-                        uint8_t **decode_matrix,
-                        uint8_t **decode_tbls,
-                        uint32_t **decode_index)
-{
-        int i, j, r, s, l, z;
-        uint8_t input_matrix[k * n];
-        uint8_t inverse_matrix[k * n];
-        uint8_t tmp_decode_matrix[k * n];
-        uint8_t tmp_decode_tbls[k * n * 32];
-        uint32_t tmp_decode_index[k];
-
-        for (i = 0, r = 0; i < k; i++, r++) {
-                while (_minio_src_index_in_error(r, error_index))
-                        r++;
-                for (j = 0; j < k; j++) {
-                        input_matrix[k * i + j] = encode_matrix[k * r + j];
-                }
-                tmp_decode_index[i] = r;
-        }
-
-        // Not all vandermonde matrix can be inverted
-        if (gf_invert_matrix(input_matrix, inverse_matrix, k) < 0) {
-                return -1;
-        }
-
-        for (l = 0; l < errs; l++) {
-                if (error_index[l] < k) {
-                        // decoding matrix elements for data chunks
-                        for (j = 0; j < k; j++) {
-                                tmp_decode_matrix[k * l + j] =
-                                        inverse_matrix[k *
-                                                       error_index[l] + j];
-                        }
-                } else {
-                        int s = 0;
-                        // decoding matrix element for coding chunks
-                        for (i = 0; i < k; i++) {
-                                s = 0;
-                                for (j = 0; j < k; j++) {
-                                        s ^= gf_mul(inverse_matrix[j * k + i],
-                                                    encode_matrix[k *
-                                                                  error_index[l] + j]);
-                                }
-                                tmp_decode_matrix[k * l + i] = s;
-                        }
-                }
-        }
-
-        ec_init_tables (k, errs, tmp_decode_matrix, tmp_decode_tbls);
-
-        *decode_matrix = tmp_decode_matrix;
-        *decode_tbls = tmp_decode_tbls;
-        *decode_index = tmp_decode_index;
-
-        return 0;
-}
--- a/pkg/encoding/erasure/ec-encode.c
+++ b/pkg/encoding/erasure/ec-encode.c
@ -1,59 +0,0 @@
-/*
- * Minimalist Object Storage, (C) 2014 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-
-#include "ec-code.h"
-#include "ec-common.h"
-
-/*
-  Generate encode matrix during the encoding phase
-*/
-
-int32_t minio_init_encoder (int technique, int k, int m,
-                            uint8_t **encode_matrix,
-                            uint8_t **encode_tbls)
-{
-        size_t encode_matrix_size;
-        size_t encode_tbls_size;
-        uint8_t *tmp_matrix;
-        uint8_t *tmp_tbls;
-
-        tmp_matrix = (uint8_t *) malloc (k * (k + m));
-        tmp_tbls = (uint8_t *) malloc (k * (k + m) * 32);
-
-	if (technique == 0) {
-                /*
-                  Commonly used method for choosing coefficients in erasure
-                  encoding but does not guarantee invertable for every sub
-                  matrix.  For large k it is possible to find cases where the
-                  decode matrix chosen from sources and parity not in erasure
-                  are not invertable. Users may want to adjust for k > 5.
-                  -- Intel
-                */
-		gf_gen_rs_matrix (tmp_matrix, k + m, k);
-	} else if (technique == 1) {
-		gf_gen_cauchy1_matrix (tmp_matrix, k + m, k);
-        }
-
-	ec_init_tables(k, m, &tmp_matrix[k * k], tmp_tbls);
-
-        *encode_matrix = tmp_matrix;
-        *encode_tbls = tmp_tbls;
-
-        return 0;
-}
--- a/pkg/encoding/erasure/ec-highlevel-func.c
+++ b/pkg/encoding/erasure/ec-highlevel-func.c
@ -1,153 +0,0 @@
-/**********************************************************************
-  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-**********************************************************************/
-#include <limits.h>
-#include <stdint.h>
-#include "ec-code.h"
-#include "ec-ctypes.h"
-
-void ec_init_tables(int k, int rows, uint8_t *a, uint8_t *g_tbls)
-{
-        int i, j;
-
-        for (i = 0; i < rows; i++) {
-                for (j = 0; j < k; j++) {
-                        gf_vect_mul_init(*a++, g_tbls);
-                        g_tbls += 32;
-                }
-        }
-}
-
-#if __WORDSIZE == 64 || _WIN64 || __x86_64__
-void ec_encode_data_sse(int len, int k, int rows, uint8_t *g_tbls, uint8_t **data,
-                        uint8_t **coding)
-{
-
-        if (len < 16) {
-                ec_encode_data_base(len, k, rows, g_tbls, data, coding);
-                return;
-        }
-
-        while (rows >= 4) {
-                gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
-                g_tbls += 4 * k * 32;
-                coding += 4;
-                rows -= 4;
-        }
-        switch (rows) {
-        case 3:
-                gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
-                break;
-        case 2:
-                gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
-                break;
-        case 1:
-                gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
-                break;
-        case 0:
-                break;
-        }
-
-}
-
-void ec_encode_data_avx(int len, int k, int rows, uint8_t *g_tbls, uint8_t **data,
-                        uint8_t **coding)
-{
-
-        if (len < 16) {
-                ec_encode_data_base(len, k, rows, g_tbls, data, coding);
-                return;
-        }
-
-        while (rows >= 4) {
-                gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
-                g_tbls += 4 * k * 32;
-                coding += 4;
-                rows -= 4;
-        }
-        switch (rows) {
-        case 3:
-                gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
-                break;
-        case 2:
-                gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
-                break;
-        case 1:
-                gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
-                break;
-        case 0:
-                break;
-        }
-
-}
-
-void ec_encode_data_avx2(int len, int k, int rows, uint8_t *g_tbls, uint8_t **data,
-                         uint8_t **coding)
-{
-
-        if (len < 32) {
-                ec_encode_data_base(len, k, rows, g_tbls, data, coding);
-                return;
-        }
-
-        while (rows >= 4) {
-                gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
-                g_tbls += 4 * k * 32;
-                coding += 4;
-                rows -= 4;
-        }
-        switch (rows) {
-        case 3:
-                gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
-                break;
-        case 2:
-                gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
-                break;
-        case 1:
-                gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
-                break;
-        case 0:
-                break;
-        }
-
-}
-
-#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
-
-struct slver {
-        UINT16 snum;
-        UINT8 ver;
-        UINT8 core;
-};
-
-// Version info
-struct slver ec_init_tables_slver_00010068;
-struct slver ec_init_tables_slver = { 0x0068, 0x01, 0x00 };
-
-struct slver ec_encode_data_sse_slver_00020069;
-struct slver ec_encode_data_sse_slver = { 0x0069, 0x02, 0x00 };
--- a/pkg/encoding/erasure/ec-multibinary.asm
+++ b/pkg/encoding/erasure/ec-multibinary.asm
@ -1,302 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-%ifidn __OUTPUT_FORMAT__, elf64
-%define WRT_OPT		wrt ..plt
-%else
-%define WRT_OPT
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
-%define EC_ENCODE_DATA_SSE _ec_encode_data_sse
-%define EC_ENCODE_DATA_AVX _ec_encode_data_avx
-%define EC_ENCODE_DATA_AVX2 _ec_encode_data_avx2
-%define GF_VECT_MUL_SSE _gf_vect_mul_sse
-%define GF_VECT_MUL_AVX _gf_vect_mul_avx
-%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
-%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
-%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
-%define GF_VECT_MUL_BASE _gf_vect_mul_base
-%define EC_ENCODE_DATA_BASE _ec_encode_data_base
-%define GF_VECT_DOT_PROD_BASE _gf_vect_dot_prod_base
-
-%define EC_ENCODE_DATA _ec_encode_data
-%define GF_VECT_MUL _gf_vect_mul
-%define GF_VECT_DOT_PROD _gf_vect_dot_prod
-
-%else
-%define EC_ENCODE_DATA_SSE ec_encode_data_sse
-%define EC_ENCODE_DATA_AVX ec_encode_data_avx
-%define EC_ENCODE_DATA_AVX2 ec_encode_data_avx2
-%define GF_VECT_MUL_SSE gf_vect_mul_sse
-%define GF_VECT_MUL_AVX gf_vect_mul_avx
-%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
-%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
-%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
-%define GF_VECT_MUL_BASE gf_vect_mul_base
-%define EC_ENCODE_DATA_BASE ec_encode_data_base
-%define GF_VECT_DOT_PROD_BASE gf_vect_dot_prod_base
-
-%define EC_ENCODE_DATA ec_encode_data
-%define GF_VECT_MUL gf_vect_mul
-%define GF_VECT_DOT_PROD gf_vect_dot_prod
-
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf32
-
-[bits 32]
-
-%define def_wrd		dd
-%define wrd_sz  	dword
-%define arg1		esi
-
-%else
-
-%include "ec-reg-sizes.asm"
-default rel
-[bits 64]
-
-%define def_wrd 	dq
-%define wrd_sz  	qword
-%define arg1		rsi
-
-extern EC_ENCODE_DATA_SSE
-extern EC_ENCODE_DATA_AVX
-extern EC_ENCODE_DATA_AVX2
-extern GF_VECT_MUL_SSE
-extern GF_VECT_MUL_AVX
-extern GF_VECT_DOT_PROD_SSE
-extern GF_VECT_DOT_PROD_AVX
-extern GF_VECT_DOT_PROD_AVX2
-%endif
-
-extern GF_VECT_MUL_BASE
-extern EC_ENCODE_DATA_BASE
-extern GF_VECT_DOT_PROD_BASE
-
-section .data
-;;; *_mbinit are initial values for *_dispatched; is updated on first call.
-;;; Therefore, *_dispatch_init is only executed on first call.
-
-ec_encode_data_dispatched:
-	def_wrd      ec_encode_data_mbinit
-
-gf_vect_mul_dispatched:
-	def_wrd      gf_vect_mul_mbinit
-
-gf_vect_dot_prod_dispatched:
-	def_wrd      gf_vect_dot_prod_mbinit
-
-section .text
-;;;;
-; ec_encode_data multibinary function
-;;;;
-global EC_ENCODE_DATA:function
-ec_encode_data_mbinit:
-	call	ec_encode_data_dispatch_init
-
-EC_ENCODE_DATA:
-	jmp	wrd_sz [ec_encode_data_dispatched]
-
-ec_encode_data_dispatch_init:
-	push    arg1
-%ifidn __OUTPUT_FORMAT__, elf32		;; 32-bit check
-	lea     arg1, [EC_ENCODE_DATA_BASE]
-%else
-	push    rax
-	push    rbx
-	push    rcx
-	push    rdx
-	lea     arg1, [EC_ENCODE_DATA_BASE WRT_OPT] ; Default
-
-	mov     eax, 1
-	cpuid
-	lea     rbx, [EC_ENCODE_DATA_BASE WRT_OPT]
-	test    ecx, FLAG_CPUID1_ECX_SSE4_1
-	cmovne  arg1, rbx
-
-	and	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
-	cmp	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
-	lea	rbx, [EC_ENCODE_DATA_AVX WRT_OPT]
-
-	jne	_done_ec_encode_data_init
-	mov	rsi, rbx
-
-	;; Try for AVX2
-	xor	ecx, ecx
-	mov	eax, 7
-	cpuid
-	test	ebx, FLAG_CPUID1_EBX_AVX2
-	lea     rbx, [EC_ENCODE_DATA_AVX2 WRT_OPT]
-	cmovne	rsi, rbx
-
-	;; Does it have xmm and ymm support
-	xor	ecx, ecx
-	xgetbv
-	and	eax, FLAG_XGETBV_EAX_XMM_YMM
-	cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
-	je	_done_ec_encode_data_init
-	lea     rsi, [EC_ENCODE_DATA_SSE WRT_OPT]
-
-_done_ec_encode_data_init:
-	pop     rdx
-	pop     rcx
-	pop     rbx
-	pop     rax
-%endif			;; END 32-bit check
-	mov     [ec_encode_data_dispatched], arg1
-	pop     arg1
-	ret
-
-;;;;
-; gf_vect_mul multibinary function
-;;;;
-global GF_VECT_MUL:function
-gf_vect_mul_mbinit:
-	call    gf_vect_mul_dispatch_init
-
-GF_VECT_MUL:
-	jmp	wrd_sz [gf_vect_mul_dispatched]
-
-gf_vect_mul_dispatch_init:
-	push    arg1
-%ifidn __OUTPUT_FORMAT__, elf32		;; 32-bit check
-	lea     arg1, [GF_VECT_MUL_BASE]
-%else
-	push    rax
-	push    rbx
-	push    rcx
-	push    rdx
-	lea     arg1, [GF_VECT_MUL_BASE WRT_OPT] ; Default
-
-	mov     eax, 1
-	cpuid
-	test    ecx, FLAG_CPUID1_ECX_SSE4_2
-	lea     rbx, [GF_VECT_MUL_SSE WRT_OPT]
-	je      _done_gf_vect_mul_dispatch_init
-	mov     arg1, rbx
-
-	;; Try for AVX
-	and     ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
-	cmp     ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
-	jne     _done_gf_vect_mul_dispatch_init
-
-	;; Does it have xmm and ymm support
-	xor     ecx, ecx
-	xgetbv
-	and     eax, FLAG_XGETBV_EAX_XMM_YMM
-	cmp     eax, FLAG_XGETBV_EAX_XMM_YMM
-	jne     _done_gf_vect_mul_dispatch_init
-	lea     arg1, [GF_VECT_MUL_AVX WRT_OPT]
-
-_done_gf_vect_mul_dispatch_init:
-        pop     rdx
-        pop     rcx
-        pop     rbx
-        pop     rax
-%endif  ;; END 32-bit check
-        mov     [gf_vect_mul_dispatched], arg1
-        pop     arg1
-        ret
-
-
-;;;;
-; gf_vect_dot_prod multibinary function
-;;;;
-global GF_VECT_DOT_PROD:function
-gf_vect_dot_prod_mbinit:
-	call    gf_vect_dot_prod_dispatch_init
-
-GF_VECT_DOT_PROD:
-	jmp     wrd_sz [gf_vect_dot_prod_dispatched]
-
-gf_vect_dot_prod_dispatch_init:
-	push    arg1
-%ifidn __OUTPUT_FORMAT__, elf32         ;; 32-bit check
-	lea     arg1, [GF_VECT_DOT_PROD_BASE]
-%else
-	push	rax
-	push	rbx
-	push	rcx
-	push	rdx
-	lea     arg1, [GF_VECT_DOT_PROD_BASE WRT_OPT] ; Default
-
-	mov     eax, 1
-	cpuid
-	lea     rbx, [GF_VECT_DOT_PROD_SSE WRT_OPT]
-	test    ecx, FLAG_CPUID1_ECX_SSE4_1
-	cmovne  arg1, rbx
-
-	and	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
-	cmp	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
-	lea     rbx, [GF_VECT_DOT_PROD_AVX WRT_OPT]
-
-	jne     _done_gf_vect_dot_prod_init
-	mov	rsi, rbx
-
-	;; Try for AVX2
-	xor	ecx, ecx
-	mov	eax, 7
-	cpuid
-	test	ebx, FLAG_CPUID1_EBX_AVX2
-	lea     rbx, [GF_VECT_DOT_PROD_AVX2 WRT_OPT]
-	cmovne	rsi, rbx
-
-	;; Does it have xmm and ymm support
-	xor	ecx, ecx
-	xgetbv
-	and	eax, FLAG_XGETBV_EAX_XMM_YMM
-	cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
-	je      _done_gf_vect_dot_prod_init
-	lea     rsi, [GF_VECT_DOT_PROD_SSE WRT_OPT]
-
-_done_gf_vect_dot_prod_init:
-	pop     rdx
-	pop     rcx
-	pop     rbx
-	pop     rax
-%endif			;; END 32-bit check
-	mov     [gf_vect_dot_prod_dispatched], arg1
-	pop	arg1
-	ret
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-
-;;;       func                  core, ver, snum
-slversion EC_ENCODE_DATA,	00,   02,  0133
-slversion GF_VECT_MUL,		00,   02,  0134
-slversion GF_VECT_DOT_PROD,	00,   01,  0138
--- a/pkg/encoding/erasure/ec-reg-sizes.asm
+++ b/pkg/encoding/erasure/ec-reg-sizes.asm
@ -1,96 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions 
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-%define EFLAGS_HAS_CPUID        (1<<21)
-%define FLAG_CPUID1_ECX_CLMUL   (1<<1)
-%define FLAG_CPUID1_EDX_SSE2    (1<<26)
-%define FLAG_CPUID1_ECX_SSE3	(1)
-%define FLAG_CPUID1_ECX_SSE4_1  (1<<19)
-%define FLAG_CPUID1_ECX_SSE4_2  (1<<20)
-%define FLAG_CPUID1_ECX_POPCNT  (1<<23)
-%define FLAG_CPUID1_ECX_AESNI   (1<<25)
-%define FLAG_CPUID1_ECX_OSXSAVE (1<<27)
-%define FLAG_CPUID1_ECX_AVX     (1<<28)
-%define FLAG_CPUID1_EBX_AVX2    (1<<5)
-%define FLAG_XGETBV_EAX_XMM_YMM	0x6
-
-%define FLAG_CPUID1_EAX_AVOTON 0x000406d0
-
-; define d and w variants for registers
-
-%define	raxd	eax
-%define raxw	ax
-%define raxb	al
-
-%define	rbxd	ebx
-%define rbxw	bx
-%define rbxb	bl
-
-%define	rcxd	ecx
-%define rcxw	cx
-%define rcxb	cl
-
-%define	rdxd	edx
-%define rdxw	dx
-%define rdxb	dl
-
-%define	rsid	esi
-%define rsiw	si
-%define rsib	sil
-
-%define	rdid	edi
-%define rdiw	di
-%define rdib	dil
-
-%define	rbpd	ebp
-%define rbpw	bp
-%define rbpb	bpl
-
-%define ymm0x xmm0
-%define ymm1x xmm1
-%define ymm2x xmm2
-%define ymm3x xmm3
-%define ymm4x xmm4
-%define ymm5x xmm5
-%define ymm6x xmm6
-%define ymm7x xmm7
-%define ymm8x xmm8
-%define ymm9x xmm9
-%define ymm10x xmm10
-%define ymm11x xmm11
-%define ymm12x xmm12
-%define ymm13x xmm13
-%define ymm14x xmm14
-%define ymm15x xmm15
-
-%define DWORD(reg) reg %+ d
-%define WORD(reg)  reg %+ w
-%define BYTE(reg)  reg %+ b
-
-%define XWORD(reg) reg %+ x
--- a/pkg/encoding/erasure/ec-vect-mul.h
+++ b/pkg/encoding/erasure/ec-vect-mul.h
@ -1,148 +0,0 @@
-/**********************************************************************
-  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-**********************************************************************/
-
-
-#ifndef _GF_VECT_MUL_H
-#define _GF_VECT_MUL_H
-
-/**
- *  @file gf-vect-mul.h
- *  @brief Interface to functions for vector (block) multiplication in GF(2^8).
- *
- *  This file defines the interface to routines used in fast RAID rebuild and
- *  erasure codes.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- /**
- * @brief GF(2^8) vector multiply by constant.
- *
- * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
- * is a single field element in GF(2^8). Can be used for RAID6 rebuild
- * and partial write functions. Function requires pre-calculation of a
- * 32-element constant array based on constant C. gftbl(C) = {C{00},
- * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
- * and src must be aligned to 32B.
-
- * @requires SSE4.1
- * @param len   Length of vector in bytes. Must be aligned to 32B.
- * @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
- * @param src   Pointer to src data array. Must be aligned to 32B.
- * @param dest  Pointer to destination data array. Must be aligned to 32B.
- * @returns 0 pass, other fail
- */
-
-int gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest);
-
-
- /**
- * @brief GF(2^8) vector multiply by constant.
- *
- * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
- * is a single field element in GF(2^8). Can be used for RAID6 rebuild
- * and partial write functions. Function requires pre-calculation of a
- * 32-element constant array based on constant C. gftbl(C) = {C{00},
- * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
- * and src must be aligned to 32B.
-
- * @requires AVX
- * @param len   Length of vector in bytes. Must be aligned to 32B.
- * @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
- * @param src   Pointer to src data array. Must be aligned to 32B.
- * @param dest  Pointer to destination data array. Must be aligned to 32B.
- * @returns 0 pass, other fail
- */
-
-int gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest);
-
-
-/**
- * @brief GF(2^8) vector multiply by constant, runs appropriate version.
- *
- * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
- * is a single field element in GF(2^8). Can be used for RAID6 rebuild
- * and partial write functions. Function requires pre-calculation of a
- * 32-element constant array based on constant C. gftbl(C) = {C{00},
- * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }.
- * Len and src must be aligned to 32B.
- *
- * This function determines what instruction sets are enabled
- * and selects the appropriate version at runtime.
- *
- * @param len   Length of vector in bytes. Must be aligned to 32B.
- * @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
- * @param src   Pointer to src data array. Must be aligned to 32B.
- * @param dest  Pointer to destination data array. Must be aligned to 32B.
- * @returns 0 pass, other fail
- */
-
-int gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest);
-
-
-/**
- * @brief Initialize 32-byte constant array for GF(2^8) vector multiply
- *
- * Calculates array {C{00}, C{01}, C{02}, ... , C{0f} }, {C{00}, C{10},
- * C{20}, ... , C{f0} } as required by other fast vector multiply
- * functions.
- * @param c     Constant input.
- * @param gftbl Table output.
- */
-
-void gf_vect_mul_init(unsigned char c, unsigned char* gftbl);
-
-
-/**
- * @brief GF(2^8) vector multiply by constant, runs baseline version.
- *
- * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
- * is a single field element in GF(2^8). Can be used for RAID6 rebuild
- * and partial write functions. Function requires pre-calculation of a
- * 32-element constant array based on constant C. gftbl(C) = {C{00},
- * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
- * and src must be aligned to 32B.
- *
- * @param len   Length of vector in bytes. Must be aligned to 32B.
- * @param a 	Pointer to 32-byte array of pre-calculated constants based on C.
- * 		only use 2nd element is used.
- * @param src   Pointer to src data array. Must be aligned to 32B.
- * @param dest  Pointer to destination data array. Must be aligned to 32B.
- */
-
-void gf_vect_mul_base(int len, unsigned char *a, unsigned char *src,
-			unsigned char *dest);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif //_GF_VECT_MUL_H
--- a/pkg/encoding/erasure/erasure_decode.go
+++ b/pkg/encoding/erasure/erasure_decode.go
@ -1,121 +0,0 @@
-/*
- * Minimalist Object Storage, (C) 2014 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package erasure
-
-// #cgo CFLAGS: -O0
-// #include <stdlib.h>
-// #include "ec-code.h"
-// #include "ec-common.h"
-import "C"
-import (
-	"errors"
-	"fmt"
-	"unsafe"
-)
-
-// Decode decodes erasure coded blocks of data into its original
-// form. Erasure coded data contains K data blocks and M parity
-// blocks. Decode can withstand data loss up to any M number of blocks.
-//
-// "encodedDataBlocks" is an array of K data blocks and M parity
-//    blocks. Data blocks are position and order dependent. Missing blocks
-//    are set to "nil". There must be at least "K" number of data|parity
-//    blocks.
-// "dataLen" is the length of original source data
-func (e *Erasure) Decode(encodedDataBlocks [][]byte, dataLen int) (decodedData []byte, err error) {
-	var source, target **C.uint8_t
-
-	k := int(e.params.K)
-	m := int(e.params.M)
-	n := k + m
-	// We need the data and parity blocks preserved in the same order. Missing blocks are set to nil.
-	if len(encodedDataBlocks) != n {
-		msg := fmt.Sprintf("Encoded data blocks slice must of length [%d]", n)
-		return nil, errors.New(msg)
-	}
-
-	// Length of a single encoded block
-	encodedBlockLen := GetEncodedBlockLen(dataLen, uint8(k))
-
-	// Keep track of errors per block.
-	missingEncodedBlocks := make([]int, n+1)
-	var missingEncodedBlocksCount int
-
-	// Check for the missing encoded blocks
-	for i := range encodedDataBlocks {
-		if encodedDataBlocks[i] == nil || len(encodedDataBlocks[i]) == 0 {
-			missingEncodedBlocks[missingEncodedBlocksCount] = i
-			missingEncodedBlocksCount++
-		}
-	}
-	missingEncodedBlocks[missingEncodedBlocksCount] = -1
-	missingEncodedBlocksCount++
-
-	// Cannot reconstruct original data. Need at least M number of data or parity blocks.
-	if missingEncodedBlocksCount-1 > m {
-		return nil, fmt.Errorf("Cannot reconstruct original data. Need at least [%d]  data or parity blocks", m)
-	}
-
-	// Convert from Go int slice to C int array
-	missingEncodedBlocksC := intSlice2CIntArray(missingEncodedBlocks[:missingEncodedBlocksCount])
-
-	// Allocate buffer for the missing blocks
-	for i := range encodedDataBlocks {
-		if encodedDataBlocks[i] == nil || len(encodedDataBlocks[i]) == 0 {
-			encodedDataBlocks[i] = make([]byte, encodedBlockLen)
-		}
-	}
-
-	// If not already initialized, recompute and cache
-	if e.decodeMatrix == nil || e.decodeTbls == nil || e.decodeIndex == nil {
-		var decodeMatrix, decodeTbls *C.uint8_t
-		var decodeIndex *C.uint32_t
-
-		C.minio_init_decoder(missingEncodedBlocksC, C.int(k), C.int(n), C.int(missingEncodedBlocksCount-1),
-			e.encodeMatrix, &decodeMatrix, &decodeTbls, &decodeIndex)
-
-		// cache this for future needs
-		e.decodeMatrix = decodeMatrix
-		e.decodeTbls = decodeTbls
-		e.decodeIndex = decodeIndex
-	}
-
-	// Make a slice of pointers to encoded blocks. Necessary to bridge to the C world.
-	pointers := make([]*byte, n)
-	for i := range encodedDataBlocks {
-		pointers[i] = &encodedDataBlocks[i][0]
-	}
-
-	// Get pointers to source "data" and target "parity" blocks from the output byte array.
-	ret := C.minio_get_source_target(C.int(missingEncodedBlocksCount-1), C.int(k), C.int(m), missingEncodedBlocksC,
-		e.decodeIndex, (**C.uint8_t)(unsafe.Pointer(&pointers[0])), &source, &target)
-	if int(ret) == -1 {
-		return nil, errors.New("Unable to decode data")
-	}
-
-	// Decode data
-	C.ec_encode_data(C.int(encodedBlockLen), C.int(k), C.int(missingEncodedBlocksCount-1), e.decodeTbls,
-		source, target)
-
-	// Allocate buffer to output buffer
-	decodedData = make([]byte, 0, encodedBlockLen*int(k))
-	for i := 0; i < int(k); i++ {
-		decodedData = append(decodedData, encodedDataBlocks[i]...)
-	}
-
-	return decodedData[:dataLen], nil
-}
--- a/pkg/encoding/erasure/erasure_encode.go
+++ b/pkg/encoding/erasure/erasure_encode.go
@ -1,197 +0,0 @@
-/*
- * Minimalist Object Storage, (C) 2014 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package erasure
-
-// #cgo CFLAGS: -O0
-// #include <stdlib.h>
-// #include "ec-code.h"
-// #include "ec-common.h"
-import "C"
-import (
-	"errors"
-	"unsafe"
-)
-
-// Technique - type of matrix type used in encoding
-type Technique uint8
-
-// Different types of supported matrix types
-const (
-	Vandermonde Technique = iota
-	Cauchy
-	None
-)
-
-// Default Data and Parity blocks
-const (
-	K = 10
-	M = 3
-)
-
-// Block alignment
-const (
-	SIMDAlign = 32
-)
-
-// ErasureParams is a configuration set for building an encoder. It is created using ValidateParams().
-type ErasureParams struct {
-	K         uint8
-	M         uint8
-	Technique Technique // cauchy or vandermonde matrix (RS)
-}
-
-// Erasure is an object used to encode and decode data.
-type Erasure struct {
-	params                   *ErasureParams
-	encodeMatrix, encodeTbls *C.uint8_t
-	decodeMatrix, decodeTbls *C.uint8_t
-	decodeIndex              *C.uint32_t
-}
-
-// ValidateParams creates an ErasureParams object.
-//
-// k and m represent the matrix size, which corresponds to the protection level
-// technique is the matrix type. Valid inputs are Cauchy (recommended) or Vandermonde.
-//
-func ValidateParams(k, m uint8, technique Technique) (*ErasureParams, error) {
-	if k < 1 {
-		return nil, errors.New("k cannot be zero")
-	}
-
-	if m < 1 {
-		return nil, errors.New("m cannot be zero")
-	}
-
-	if k+m > 255 {
-		return nil, errors.New("(k + m) cannot be bigger than Galois field GF(2^8) - 1")
-	}
-
-	switch technique {
-	case Vandermonde:
-		break
-	case Cauchy:
-		break
-	default:
-		return nil, errors.New("Technique can be either vandermonde or cauchy")
-	}
-
-	return &ErasureParams{
-		K:         k,
-		M:         m,
-		Technique: technique,
-	}, nil
-}
-
-// NewErasure creates an encoder object with a given set of parameters.
-func NewErasure(ep *ErasureParams) *Erasure {
-	var k = C.int(ep.K)
-	var m = C.int(ep.M)
-
-	var encodeMatrix *C.uint8_t
-	var encodeTbls *C.uint8_t
-
-	C.minio_init_encoder(C.int(ep.Technique), k, m, &encodeMatrix,
-		&encodeTbls)
-
-	return &Erasure{
-		params:       ep,
-		encodeMatrix: encodeMatrix,
-		encodeTbls:   encodeTbls,
-		decodeMatrix: nil,
-		decodeTbls:   nil,
-		decodeIndex:  nil,
-	}
-}
-
-// GetEncodedBlocksLen - total length of all encoded blocks
-func GetEncodedBlocksLen(inputLen int, k, m uint8) (outputLen int) {
-	outputLen = GetEncodedBlockLen(inputLen, k) * int(k+m)
-	return outputLen
-}
-
-// GetEncodedBlockLen - length per block of encoded blocks
-func GetEncodedBlockLen(inputLen int, k uint8) (encodedOutputLen int) {
-	alignment := int(k) * SIMDAlign
-	remainder := inputLen % alignment
-
-	paddedInputLen := inputLen
-	if remainder != 0 {
-		paddedInputLen = inputLen + (alignment - remainder)
-	}
-	encodedOutputLen = paddedInputLen / int(k)
-	return encodedOutputLen
-}
-
-// Encode erasure codes a block of data in "k" data blocks and "m" parity blocks.
-// Output is [k+m][]blocks of data and parity slices.
-func (e *Erasure) Encode(inputData []byte) (encodedBlocks [][]byte, err error) {
-	k := int(e.params.K) // "k" data blocks
-	m := int(e.params.M) // "m" parity blocks
-	n := k + m           // "n" total encoded blocks
-
-	// Length of a single encoded chunk.
-	// Total number of encoded chunks = "k" data  + "m" parity blocks
-	encodedBlockLen := GetEncodedBlockLen(len(inputData), uint8(k))
-
-	// Length of total number of "k" data chunks
-	encodedDataBlocksLen := encodedBlockLen * k
-
-	// Length of extra padding required for the data blocks.
-	encodedDataBlocksPadLen := encodedDataBlocksLen - len(inputData)
-
-	// Extend inputData buffer to accommodate coded data blocks if necesssary
-	if encodedDataBlocksPadLen > 0 {
-		padding := make([]byte, encodedDataBlocksPadLen)
-		// Expand with new padded blocks to the byte array
-		inputData = append(inputData, padding...)
-	}
-
-	// Extend inputData buffer to accommodate coded parity blocks
-	{ // Local Scope
-		encodedParityBlocksLen := encodedBlockLen * m
-		parityBlocks := make([]byte, encodedParityBlocksLen)
-		inputData = append(inputData, parityBlocks...)
-	}
-
-	// Allocate memory to the "encoded blocks" return buffer
-	encodedBlocks = make([][]byte, n) // Return buffer
-
-	// Nessary to bridge Go to the C world. C requires 2D arry of pointers to
-	// byte array. "encodedBlocks" is a 2D slice.
-	pointersToEncodedBlock := make([]*byte, n) // Pointers to encoded blocks.
-
-	// Copy data block slices to encoded block buffer
-	for i := 0; i < k; i++ {
-		encodedBlocks[i] = inputData[i*encodedBlockLen : (i+1)*encodedBlockLen]
-		pointersToEncodedBlock[i] = &encodedBlocks[i][0]
-	}
-
-	// Copy erasure block slices to encoded block buffer
-	for i := k; i < n; i++ {
-		encodedBlocks[i] = make([]byte, encodedBlockLen)
-		pointersToEncodedBlock[i] = &encodedBlocks[i][0]
-	}
-
-	// Erasure code the data into K data blocks and M parity
-	// blocks. Only the parity blocks are filled. Data blocks remain
-	// intact.
-	C.ec_encode_data(C.int(encodedBlockLen), C.int(k), C.int(m), e.encodeTbls,
-		(**C.uint8_t)(unsafe.Pointer(&pointersToEncodedBlock[:k][0])), // Pointers to data blocks
-		(**C.uint8_t)(unsafe.Pointer(&pointersToEncodedBlock[k:][0]))) // Pointers to parity blocks
-
-	return encodedBlocks, nil
-}
--- a/pkg/encoding/erasure/erasure_yasm_darwin.go
+++ b/pkg/encoding/erasure/erasure_yasm_darwin.go
@ -1,25 +0,0 @@
-// !build amd64
-
-package erasure
-
-//go:generate yasm -f macho64 ec-multibinary.asm -o ec-multibinary.syso
-//go:generate yasm -f macho64 gf-2vect-dot-prod-avx2.asm -o gf-2vect-dot-prod-avx2.syso
-//go:generate yasm -f macho64 gf-2vect-dot-prod-avx.asm -o gf-2vect-dot-prod-avx.syso
-//go:generate yasm -f macho64 gf-2vect-dot-prod-sse.asm -o gf-2vect-dot-prod-sse.syso
-//go:generate yasm -f macho64 gf-3vect-dot-prod-avx2.asm -o gf-3vect-dot-prod-avx2.syso
-//go:generate yasm -f macho64 gf-3vect-dot-prod-avx.asm -o gf-3vect-dot-prod-avx.syso
-//go:generate yasm -f macho64 gf-3vect-dot-prod-sse.asm -o gf-3vect-dot-prod-sse.syso
-//go:generate yasm -f macho64 gf-4vect-dot-prod-avx2.asm -o gf-4vect-dot-prod-avx2.syso
-//go:generate yasm -f macho64 gf-4vect-dot-prod-avx.asm -o gf-4vect-dot-prod-avx.syso
-//go:generate yasm -f macho64 gf-4vect-dot-prod-sse.asm -o gf-4vect-dot-prod-sse.syso
-//go:generate yasm -f macho64 gf-5vect-dot-prod-avx2.asm -o gf-5vect-dot-prod-avx2.syso
-//go:generate yasm -f macho64 gf-5vect-dot-prod-avx.asm -o gf-5vect-dot-prod-avx.syso
-//go:generate yasm -f macho64 gf-5vect-dot-prod-sse.asm -o gf-5vect-dot-prod-sse.syso
-//go:generate yasm -f macho64 gf-6vect-dot-prod-avx2.asm -o gf-6vect-dot-prod-avx2.syso
-//go:generate yasm -f macho64 gf-6vect-dot-prod-avx.asm -o gf-6vect-dot-prod-avx.syso
-//go:generate yasm -f macho64 gf-6vect-dot-prod-sse.asm -o gf-6vect-dot-prod-sse.syso
-//go:generate yasm -f macho64 gf-vect-dot-prod-avx2.asm -o gf-vect-dot-prod-avx2.syso
-//go:generate yasm -f macho64 gf-vect-dot-prod-avx.asm -o gf-vect-dot-prod-avx.syso
-//go:generate yasm -f macho64 gf-vect-dot-prod-sse.asm -o gf-vect-dot-prod-sse.syso
-//go:generate yasm -f macho64 gf-vect-mul-avx.asm -o gf-vect-mul-avx.syso
-//go:generate yasm -f macho64 gf-vect-mul-sse.asm -o gf-vect-mul-sse.syso
--- a/pkg/encoding/erasure/erasure_yasm_linux.go
+++ b/pkg/encoding/erasure/erasure_yasm_linux.go
@ -1,25 +0,0 @@
-// !build amd64
-
-package erasure
-
-//go:generate yasm -f elf64 ec-multibinary.asm -o ec-multibinary.syso
-//go:generate yasm -f elf64 gf-2vect-dot-prod-avx2.asm -o gf-2vect-dot-prod-avx2.syso
-//go:generate yasm -f elf64 gf-2vect-dot-prod-avx.asm -o gf-2vect-dot-prod-avx.syso
-//go:generate yasm -f elf64 gf-2vect-dot-prod-sse.asm -o gf-2vect-dot-prod-sse.syso
-//go:generate yasm -f elf64 gf-3vect-dot-prod-avx2.asm -o gf-3vect-dot-prod-avx2.syso
-//go:generate yasm -f elf64 gf-3vect-dot-prod-avx.asm -o gf-3vect-dot-prod-avx.syso
-//go:generate yasm -f elf64 gf-3vect-dot-prod-sse.asm -o gf-3vect-dot-prod-sse.syso
-//go:generate yasm -f elf64 gf-4vect-dot-prod-avx2.asm -o gf-4vect-dot-prod-avx2.syso
-//go:generate yasm -f elf64 gf-4vect-dot-prod-avx.asm -o gf-4vect-dot-prod-avx.syso
-//go:generate yasm -f elf64 gf-4vect-dot-prod-sse.asm -o gf-4vect-dot-prod-sse.syso
-//go:generate yasm -f elf64 gf-5vect-dot-prod-avx2.asm -o gf-5vect-dot-prod-avx2.syso
-//go:generate yasm -f elf64 gf-5vect-dot-prod-avx.asm -o gf-5vect-dot-prod-avx.syso
-//go:generate yasm -f elf64 gf-5vect-dot-prod-sse.asm -o gf-5vect-dot-prod-sse.syso
-//go:generate yasm -f elf64 gf-6vect-dot-prod-avx2.asm -o gf-6vect-dot-prod-avx2.syso
-//go:generate yasm -f elf64 gf-6vect-dot-prod-avx.asm -o gf-6vect-dot-prod-avx.syso
-//go:generate yasm -f elf64 gf-6vect-dot-prod-sse.asm -o gf-6vect-dot-prod-sse.syso
-//go:generate yasm -f elf64 gf-vect-dot-prod-avx2.asm -o gf-vect-dot-prod-avx2.syso
-//go:generate yasm -f elf64 gf-vect-dot-prod-avx.asm -o gf-vect-dot-prod-avx.syso
-//go:generate yasm -f elf64 gf-vect-dot-prod-sse.asm -o gf-vect-dot-prod-sse.syso
-//go:generate yasm -f elf64 gf-vect-mul-avx.asm -o gf-vect-mul-avx.syso
-//go:generate yasm -f elf64 gf-vect-mul-sse.asm -o gf-vect-mul-sse.syso
--- a/pkg/encoding/erasure/erasure_yasm_windows.go
+++ b/pkg/encoding/erasure/erasure_yasm_windows.go
@ -1,25 +0,0 @@
-// !build amd64
-
-package erasure
-
-//go:generate yasm -f win64 ec-multibinary.asm -o ec-multibinary.syso
-//go:generate yasm -f win64 gf-2vect-dot-prod-avx2.asm -o gf-2vect-dot-prod-avx2.syso
-//go:generate yasm -f win64 gf-2vect-dot-prod-avx.asm -o gf-2vect-dot-prod-avx.syso
-//go:generate yasm -f win64 gf-2vect-dot-prod-sse.asm -o gf-2vect-dot-prod-sse.syso
-//go:generate yasm -f win64 gf-3vect-dot-prod-avx2.asm -o gf-3vect-dot-prod-avx2.syso
-//go:generate yasm -f win64 gf-3vect-dot-prod-avx.asm -o gf-3vect-dot-prod-avx.syso
-//go:generate yasm -f win64 gf-3vect-dot-prod-sse.asm -o gf-3vect-dot-prod-sse.syso
-//go:generate yasm -f win64 gf-4vect-dot-prod-avx2.asm -o gf-4vect-dot-prod-avx2.syso
-//go:generate yasm -f win64 gf-4vect-dot-prod-avx.asm -o gf-4vect-dot-prod-avx.syso
-//go:generate yasm -f win64 gf-4vect-dot-prod-sse.asm -o gf-4vect-dot-prod-sse.syso
-//go:generate yasm -f win64 gf-5vect-dot-prod-avx2.asm -o gf-5vect-dot-prod-avx2.syso
-//go:generate yasm -f win64 gf-5vect-dot-prod-avx.asm -o gf-5vect-dot-prod-avx.syso
-//go:generate yasm -f win64 gf-5vect-dot-prod-sse.asm -o gf-5vect-dot-prod-sse.syso
-//go:generate yasm -f win64 gf-6vect-dot-prod-avx2.asm -o gf-6vect-dot-prod-avx2.syso
-//go:generate yasm -f win64 gf-6vect-dot-prod-avx.asm -o gf-6vect-dot-prod-avx.syso
-//go:generate yasm -f win64 gf-6vect-dot-prod-sse.asm -o gf-6vect-dot-prod-sse.syso
-//go:generate yasm -f win64 gf-vect-dot-prod-avx2.asm -o gf-vect-dot-prod-avx2.syso
-//go:generate yasm -f win64 gf-vect-dot-prod-avx.asm -o gf-vect-dot-prod-avx.syso
-//go:generate yasm -f win64 gf-vect-dot-prod-sse.asm -o gf-vect-dot-prod-sse.syso
-//go:generate yasm -f win64 gf-vect-mul-avx.asm -o gf-vect-mul-avx.syso
-//go:generate yasm -f win64 gf-vect-mul-sse.asm -o gf-vect-mul-sse.syso
--- a/pkg/encoding/erasure/gf-2vect-dot-prod-avx.asm
+++ b/pkg/encoding/erasure/gf-2vect-dot-prod-avx.asm
@ -1,263 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_2vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_2VECT_DOT_PROD_AVX _gf_2vect_dot_prod_avx
-%else
- %define GF_2VECT_DOT_PROD_AVX gf_2vect_dot_prod_avx
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r9
- %define tmp4  r12		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r9
- %define tmp4  r12		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define tmp    r11
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  3*16 + 3*8 	; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	save_xmm128	xmm6, 0*16
-	save_xmm128	xmm7, 1*16
-	save_xmm128	xmm8, 2*16
-	save_reg	r12,  3*16 + 0*8
-	save_reg	r13,  3*16 + 1*8
-	save_reg	r14,  3*16 + 2*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	vmovdqa	xmm6, [rsp + 0*16]
-	vmovdqa	xmm7, [rsp + 1*16]
-	vmovdqa	xmm8, [rsp + 2*16]
-	mov	r12,  [rsp + 3*16 + 0*8]
-	mov	r13,  [rsp + 3*16 + 1*8]
-	mov	r14,  [rsp + 3*16 + 2*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-%define len   arg0
-%define vec   arg1
-%define mul_array arg2
-%define	src   arg3
-%define dest1 arg4
-
-%define vec_i tmp2
-%define ptr   tmp3
-%define dest2 tmp4
-%define pos   return
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR vmovdqu
- %define XSTR vmovdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR vmovdqa
-  %define XSTR vmovdqa
- %else
-  %define XLDR vmovntdqa
-  %define XSTR vmovntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   xmm8
-%define xgft1_lo  xmm7
-%define xgft1_hi  xmm6
-%define xgft2_lo  xmm5
-%define xgft2_hi  xmm4
-
-%define x0     xmm0
-%define xtmpa  xmm1
-%define xp1    xmm2
-%define xp2    xmm3
-
-align 16
-global GF_2VECT_DOT_PROD_AVX:function
-func(GF_2VECT_DOT_PROD_AVX)
-	FUNC_SAVE
-	sub	len, 16
-	jl	.return_fail
-	xor	pos, pos
-	vmovdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest2, [dest1+PS]
-	mov	dest1, [dest1]
-
-.loop16
-	vpxor	xp1, xp1
-	vpxor	xp2, xp2
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-
-.next_vect
-	mov	ptr, [src+vec_i]
-
-	vmovdqu	xgft1_lo, [tmp]		;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-	vmovdqu	xgft1_hi, [tmp+16]	;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	vmovdqu	xgft2_lo, [tmp+vec*(32/PS)]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-	vmovdqu	xgft2_hi, [tmp+vec*(32/PS)+16]	;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-
-	XLDR	x0, [ptr+pos]		;Get next source vector
-	add	tmp, 32
-	add	vec_i, PS
-
-	vpand	xtmpa, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp1, xgft1_hi		;xp1 += partial
-
-	vpshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	vpxor	xp2, xgft2_hi		;xp2 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-
-	add	pos, 16			;Loop on 16 bytes at a time
-	cmp	pos, len
-	jle	.loop16
-
-	lea	tmp, [len + 16]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop16		;Do one more overlap pass
-
-.return_pass:
-	mov	return, 0
-	FUNC_RESTORE
-	ret
-
-.return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                  core, ver, snum
-slversion GF_2VECT_DOT_PROD_AVX, 02,  03,  0191
--- a/pkg/encoding/erasure/gf-2vect-dot-prod-avx2.asm
+++ b/pkg/encoding/erasure/gf-2vect-dot-prod-avx2.asm
@ -1,277 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_2vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_2VECT_DOT_PROD_AVX2 _gf_2vect_dot_prod_avx2
-%else
- %define GF_2VECT_DOT_PROD_AVX2 gf_2vect_dot_prod_avx2
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp.w r11d
- %define tmp.b r11b
- %define tmp2  r10
- %define tmp3  r9
- %define tmp4  r12		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp.w r11d
- %define tmp.b r11b
- %define tmp2  r10
- %define tmp3  r9
- %define tmp4  r12		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define tmp    r11
- %define tmp.w  r11d
- %define tmp.b  r11b
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  3*16 + 3*8 	; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	vmovdqa	[rsp + 0*16], xmm6
-	vmovdqa	[rsp + 1*16], xmm7
-	vmovdqa	[rsp + 2*16], xmm8
-	save_reg	r12,  3*16 + 0*8
-	save_reg	r13,  3*16 + 1*8
-	save_reg	r14,  3*16 + 2*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	vmovdqa	xmm6, [rsp + 0*16]
-	vmovdqa	xmm7, [rsp + 1*16]
-	vmovdqa	xmm8, [rsp + 2*16]
-	mov	r12,  [rsp + 3*16 + 0*8]
-	mov	r13,  [rsp + 3*16 + 1*8]
-	mov	r14,  [rsp + 3*16 + 2*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-%define len   arg0
-%define vec   arg1
-%define mul_array arg2
-%define	src   arg3
-%define dest1 arg4
-
-%define vec_i tmp2
-%define ptr   tmp3
-%define dest2 tmp4
-%define pos   return
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR vmovdqu
- %define XSTR vmovdqu
-%else
-
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR vmovdqa
-  %define XSTR vmovdqa
- %else
-  %define XLDR vmovntdqa
-  %define XSTR vmovntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   ymm8
-%define xmask0fx  xmm8
-%define xgft1_lo  ymm7
-%define xgft1_hi  ymm6
-%define xgft2_lo  ymm5
-%define xgft2_hi  ymm4
-
-%define x0     ymm0
-%define xtmpa  ymm1
-%define xp1    ymm2
-%define xp2    ymm3
-
-align 16
-global GF_2VECT_DOT_PROD_AVX2:function
-func(GF_2VECT_DOT_PROD_AVX2)
-	FUNC_SAVE
-	sub	len, 32
-	jl	.return_fail
-	xor	pos, pos
-	mov	tmp.b, 0x0f
-	vpinsrb	xmask0fx, xmask0fx, tmp.w, 0
-	vpbroadcastb xmask0f, xmask0fx	;Construct mask 0x0f0f0f...
-
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest2, [dest1+PS]
-	mov	dest1, [dest1]
-
-.loop32
-	vpxor	xp1, xp1
-	vpxor	xp2, xp2
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-
-.next_vect
-	mov	ptr, [src+vec_i]
-
-	vmovdqu	xgft1_lo, [tmp]		;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-					;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
-	vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
-
-	vmovdqu	xgft2_lo, [tmp+vec*(32/PS)]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-						;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
-	vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
-
-
-	XLDR	x0, [ptr+pos]		;Get next source vector
-	add	tmp, 32
-	add	vec_i, PS
-
-	vpand	xtmpa, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp1, xgft1_hi		;xp1 += partial
-
-	vpshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	vpxor	xp2, xgft2_hi		;xp2 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-
-	add	pos, 32			;Loop on 32 bytes at a time
-	cmp	pos, len
-	jle	.loop32
-
-	lea	tmp, [len + 32]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop32		;Do one more overlap pass
-
-.return_pass:
-	mov	return, 0
-	FUNC_RESTORE
-	ret
-
-.return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                   core, ver, snum
-slversion GF_2VECT_DOT_PROD_AVX2, 04,  03,  0196
--- a/pkg/encoding/erasure/gf-2vect-dot-prod-sse.asm
+++ b/pkg/encoding/erasure/gf-2vect-dot-prod-sse.asm
@ -1,265 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_2vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_2VECT_DOT_PROD_SSE _gf_2vect_dot_prod_sse
-%else
- %define GF_2VECT_DOT_PROD_SSE gf_2vect_dot_prod_sse
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r9
- %define tmp4  r12		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r9
- %define tmp4  r12		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define tmp    r11
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  3*16 + 3*8 	; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	save_xmm128	xmm6, 0*16
-	save_xmm128	xmm7, 1*16
-	save_xmm128	xmm8, 2*16
-	save_reg	r12,  3*16 + 0*8
-	save_reg	r13,  3*16 + 1*8
-	save_reg	r14,  3*16 + 2*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	movdqa	xmm6, [rsp + 0*16]
-	movdqa	xmm7, [rsp + 1*16]
-	movdqa	xmm8, [rsp + 2*16]
-	mov	r12,  [rsp + 3*16 + 0*8]
-	mov	r13,  [rsp + 3*16 + 1*8]
-	mov	r14,  [rsp + 3*16 + 2*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-%define len   arg0
-%define vec   arg1
-%define mul_array arg2
-%define	src   arg3
-%define dest1 arg4
-
-%define vec_i tmp2
-%define ptr   tmp3
-%define dest2 tmp4
-%define pos   return
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR movdqu
- %define XSTR movdqu
-%else
-
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR movdqa
-  %define XSTR movdqa
- %else
-  %define XLDR movntdqa
-  %define XSTR movntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   xmm8
-%define xgft1_lo  xmm7
-%define xgft1_hi  xmm6
-%define xgft2_lo  xmm5
-%define xgft2_hi  xmm4
-
-%define x0     xmm0
-%define xtmpa  xmm1
-%define xp1    xmm2
-%define xp2    xmm3
-
-align 16
-global GF_2VECT_DOT_PROD_SSE:function
-func(GF_2VECT_DOT_PROD_SSE)
-	FUNC_SAVE
-	sub	len, 16
-	jl	.return_fail
-	xor	pos, pos
-	movdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest2, [dest1+PS]
-	mov	dest1, [dest1]
-
-.loop16
-	pxor	xp1, xp1
-	pxor	xp2, xp2
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-
-.next_vect
-	mov	ptr, [src+vec_i]
-
-	movdqu	xgft1_lo, [tmp]		;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-	movdqu	xgft1_hi, [tmp+16]	;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	movdqu	xgft2_lo, [tmp+vec*(32/PS)]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-	movdqu	xgft2_hi, [tmp+vec*(32/PS)+16]	;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-
-	XLDR	x0, [ptr+pos]		;Get next source vector
-	add	tmp, 32
-	add	vec_i, PS
-
-	movdqa	xtmpa, x0		;Keep unshifted copy of src
-	psraw	x0, 4			;Shift to put high nibble into bits 4-0
-	pand	x0, xmask0f		;Mask high src nibble in bits 4-0
-	pand	xtmpa, xmask0f		;Mask low src nibble in bits 4-0
-
-	pshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	pxor	xp1, xgft1_hi		;xp1 += partial
-
-	pshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	pxor	xp2, xgft2_hi		;xp2 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-
-	add	pos, 16			;Loop on 16 bytes at a time
-	cmp	pos, len
-	jle	.loop16
-
-	lea	tmp, [len + 16]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop16		;Do one more overlap pass
-
-.return_pass:
-	mov	return, 0
-	FUNC_RESTORE
-	ret
-
-.return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                  core, ver, snum
-slversion GF_2VECT_DOT_PROD_SSE, 00,  02,  0062
--- a/pkg/encoding/erasure/gf-3vect-dot-prod-avx.asm
+++ b/pkg/encoding/erasure/gf-3vect-dot-prod-avx.asm
@ -1,290 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_3vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_3VECT_DOT_PROD_AVX _gf_3vect_dot_prod_avx
-%else
- %define GF_3VECT_DOT_PROD_AVX gf_3vect_dot_prod_avx
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define arg5   r15 		; must be saved and restored
- %define tmp    r11
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  6*16 + 5*8 	; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	save_xmm128	xmm6, 0*16
-	save_xmm128	xmm7, 1*16
-	save_xmm128	xmm8, 2*16
-	save_xmm128	xmm9, 3*16
-	save_xmm128	xmm10, 4*16
-	save_xmm128	xmm11, 5*16
-	save_reg	r12,  6*16 + 0*8
-	save_reg	r13,  6*16 + 1*8
-	save_reg	r14,  6*16 + 2*8
-	save_reg	r15,  6*16 + 3*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	vmovdqa	xmm6, [rsp + 0*16]
-	vmovdqa	xmm7, [rsp + 1*16]
-	vmovdqa	xmm8, [rsp + 2*16]
-	vmovdqa	xmm9, [rsp + 3*16]
-	vmovdqa	xmm10, [rsp + 4*16]
-	vmovdqa	xmm11, [rsp + 5*16]
-	mov	r12,  [rsp + 6*16 + 0*8]
-	mov	r13,  [rsp + 6*16 + 1*8]
-	mov	r14,  [rsp + 6*16 + 2*8]
-	mov	r15,  [rsp + 6*16 + 3*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-%define len   arg0
-%define vec   arg1
-%define mul_array arg2
-%define	src   arg3
-%define dest1 arg4
-%define ptr   arg5
-%define vec_i tmp2
-%define dest2 tmp3
-%define dest3 tmp4
-%define pos   return
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR vmovdqu
- %define XSTR vmovdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR vmovdqa
-  %define XSTR vmovdqa
- %else
-  %define XLDR vmovntdqa
-  %define XSTR vmovntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   xmm11
-%define xgft1_lo  xmm10
-%define xgft1_hi  xmm9
-%define xgft2_lo  xmm8
-%define xgft2_hi  xmm7
-%define xgft3_lo  xmm6
-%define xgft3_hi  xmm5
-
-%define x0     xmm0
-%define xtmpa  xmm1
-%define xp1    xmm2
-%define xp2    xmm3
-%define xp3    xmm4
-
-align 16
-global GF_3VECT_DOT_PROD_AVX:function
-func(GF_3VECT_DOT_PROD_AVX)
-	FUNC_SAVE
-	sub	len, 16
-	jl	.return_fail
-	xor	pos, pos
-	vmovdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest2, [dest1+PS]
-	mov	dest3, [dest1+2*PS]
-	mov	dest1, [dest1]
-
-
-.loop16:
-	vpxor	xp1, xp1
-	vpxor	xp2, xp2
-	vpxor	xp3, xp3
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-
-.next_vect:
-	mov	ptr, [src+vec_i]
-
-	vmovdqu	xgft1_lo, [tmp]		;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-	vmovdqu	xgft1_hi, [tmp+16]	;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	vmovdqu	xgft2_lo, [tmp+vec*(32/PS)]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-	vmovdqu	xgft2_hi, [tmp+vec*(32/PS)+16]	;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	vmovdqu	xgft3_lo, [tmp+vec*(64/PS)]	;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-	vmovdqu	xgft3_hi, [tmp+vec*(64/PS)+16]	;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-
-	add	tmp, 32
-	add	vec_i, PS
-	XLDR	x0, [ptr+pos]		;Get next source vector
-
-	vpand	xtmpa, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp1, xgft1_hi		;xp1 += partial
-
-	vpshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	vpxor	xp2, xgft2_hi		;xp2 += partial
-
-	vpshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	vpxor	xp3, xgft3_hi		;xp3 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-	XSTR	[dest3+pos], xp3
-
-	add	pos, 16			;Loop on 16 bytes at a time
-	cmp	pos, len
-	jle	.loop16
-
-	lea	tmp, [len + 16]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop16		;Do one more overlap pass
-
-.return_pass:
-	mov	return, 0
-	FUNC_RESTORE
-	ret
-
-.return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                  core, ver, snum
-slversion GF_3VECT_DOT_PROD_AVX, 02,  03,  0192
--- a/pkg/encoding/erasure/gf-3vect-dot-prod-avx2.asm
+++ b/pkg/encoding/erasure/gf-3vect-dot-prod-avx2.asm
@ -1,305 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_3vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_3VECT_DOT_PROD_AVX2 _gf_3vect_dot_prod_avx2
-%else
- %define GF_3VECT_DOT_PROD_AVX2 gf_3vect_dot_prod_avx2
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp.w r11d
- %define tmp.b r11b
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp.w r11d
- %define tmp.b r11b
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define arg5   r15 		; must be saved and restored
- %define tmp    r11
- %define tmp.w  r11d
- %define tmp.b  r11b
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  6*16 + 5*8 	; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	vmovdqa	[rsp + 0*16], xmm6
-	vmovdqa	[rsp + 1*16], xmm7
-	vmovdqa	[rsp + 2*16], xmm8
-	vmovdqa	[rsp + 3*16], xmm9
-	vmovdqa	[rsp + 4*16], xmm10
-	vmovdqa	[rsp + 5*16], xmm11
-	save_reg	r12,  6*16 + 0*8
-	save_reg	r13,  6*16 + 1*8
-	save_reg	r14,  6*16 + 2*8
-	save_reg	r15,  6*16 + 3*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	vmovdqa	xmm6, [rsp + 0*16]
-	vmovdqa	xmm7, [rsp + 1*16]
-	vmovdqa	xmm8, [rsp + 2*16]
-	vmovdqa	xmm9, [rsp + 3*16]
-	vmovdqa	xmm10, [rsp + 4*16]
-	vmovdqa	xmm11, [rsp + 5*16]
-	mov	r12,  [rsp + 6*16 + 0*8]
-	mov	r13,  [rsp + 6*16 + 1*8]
-	mov	r14,  [rsp + 6*16 + 2*8]
-	mov	r15,  [rsp + 6*16 + 3*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-%define len   arg0
-%define vec   arg1
-%define mul_array arg2
-%define	src   arg3
-%define dest1 arg4
-%define ptr   arg5
-%define vec_i tmp2
-%define dest2 tmp3
-%define dest3 tmp4
-%define pos   return
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR vmovdqu
- %define XSTR vmovdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR vmovdqa
-  %define XSTR vmovdqa
- %else
-  %define XLDR vmovntdqa
-  %define XSTR vmovntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   ymm11
-%define xmask0fx  xmm11
-%define xgft1_lo  ymm10
-%define xgft1_hi  ymm9
-%define xgft2_lo  ymm8
-%define xgft2_hi  ymm7
-%define xgft3_lo  ymm6
-%define xgft3_hi  ymm5
-
-%define x0     ymm0
-%define xtmpa  ymm1
-%define xp1    ymm2
-%define xp2    ymm3
-%define xp3    ymm4
-
-align 16
-global GF_3VECT_DOT_PROD_AVX2:function
-func(GF_3VECT_DOT_PROD_AVX2)
-	FUNC_SAVE
-	sub	len, 32
-	jl	.return_fail
-	xor	pos, pos
-	mov	tmp.b, 0x0f
-	vpinsrb	xmask0fx, xmask0fx, tmp.w, 0
-	vpbroadcastb xmask0f, xmask0fx	;Construct mask 0x0f0f0f...
-
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest2, [dest1+PS]
-	mov	dest3, [dest1+2*PS]
-	mov	dest1, [dest1]
-
-
-.loop32:
-	vpxor	xp1, xp1
-	vpxor	xp2, xp2
-	vpxor	xp3, xp3
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-
-.next_vect:
-	mov	ptr, [src+vec_i]
-
-	vmovdqu	xgft1_lo, [tmp]		;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-					;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
-	vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
-
-	vmovdqu	xgft2_lo, [tmp+vec*(32/PS)]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-						;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
-	vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
-
-	vmovdqu	xgft3_lo, [tmp+vec*(64/PS)]	;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-						;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-	vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi
-	vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo
-
-	add	tmp, 32
-	add	vec_i, PS
-	XLDR	x0, [ptr+pos]		;Get next source vector
-
-	vpand	xtmpa, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp1, xgft1_hi		;xp1 += partial
-
-	vpshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	vpxor	xp2, xgft2_hi		;xp2 += partial
-
-	vpshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	vpxor	xp3, xgft3_hi		;xp3 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-	XSTR	[dest3+pos], xp3
-
-	add	pos, 32			;Loop on 32 bytes at a time
-	cmp	pos, len
-	jle	.loop32
-
-	lea	tmp, [len + 32]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop32		;Do one more overlap pass
-
-.return_pass:
-	mov	return, 0
-	FUNC_RESTORE
-	ret
-
-.return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                   core, ver, snum
-slversion GF_3VECT_DOT_PROD_AVX2, 04,  03,  0197
--- a/pkg/encoding/erasure/gf-3vect-dot-prod-sse.asm
+++ b/pkg/encoding/erasure/gf-3vect-dot-prod-sse.asm
@ -1,291 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_3vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_3VECT_DOT_PROD_SSE _gf_3vect_dot_prod_sse
-%else
- %define GF_3VECT_DOT_PROD_SSE gf_3vect_dot_prod_sse
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define arg5   r15 		; must be saved and restored
- %define tmp    r11
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  6*16 + 5*8 	; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	save_xmm128	xmm6, 0*16
-	save_xmm128	xmm7, 1*16
-	save_xmm128	xmm8, 2*16
-	save_xmm128	xmm9, 3*16
-	save_xmm128	xmm10, 4*16
-	save_xmm128	xmm11, 5*16
-	save_reg	r12,  6*16 + 0*8
-	save_reg	r13,  6*16 + 1*8
-	save_reg	r14,  6*16 + 2*8
-	save_reg	r15,  6*16 + 3*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	movdqa	xmm6, [rsp + 0*16]
-	movdqa	xmm7, [rsp + 1*16]
-	movdqa	xmm8, [rsp + 2*16]
-	movdqa	xmm9, [rsp + 3*16]
-	movdqa	xmm10, [rsp + 4*16]
-	movdqa	xmm11, [rsp + 5*16]
-	mov	r12,  [rsp + 6*16 + 0*8]
-	mov	r13,  [rsp + 6*16 + 1*8]
-	mov	r14,  [rsp + 6*16 + 2*8]
-	mov	r15,  [rsp + 6*16 + 3*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-%define len   arg0
-%define vec   arg1
-%define mul_array arg2
-%define	src   arg3
-%define dest1 arg4
-%define ptr   arg5
-%define vec_i tmp2
-%define dest2 tmp3
-%define dest3 tmp4
-%define pos   return
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR movdqu
- %define XSTR movdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR movdqa
-  %define XSTR movdqa
- %else
-  %define XLDR movntdqa
-  %define XSTR movntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   xmm11
-%define xgft1_lo  xmm10
-%define xgft1_hi  xmm9
-%define xgft2_lo  xmm8
-%define xgft2_hi  xmm7
-%define xgft3_lo  xmm6
-%define xgft3_hi  xmm5
-
-%define x0     xmm0
-%define xtmpa  xmm1
-%define xp1    xmm2
-%define xp2    xmm3
-%define xp3    xmm4
-
-align 16
-global GF_3VECT_DOT_PROD_SSE:function
-func(GF_3VECT_DOT_PROD_SSE)
-	FUNC_SAVE
-	sub	len, 16
-	jl	.return_fail
-	xor	pos, pos
-	movdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest2, [dest1+PS]
-	mov	dest3, [dest1+2*PS]
-	mov	dest1, [dest1]
-
-
-.loop16:
-	pxor	xp1, xp1
-	pxor	xp2, xp2
-	pxor	xp3, xp3
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-
-.next_vect:
-	mov	ptr, [src+vec_i]
-
-	movdqu	xgft1_lo, [tmp]		;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-	movdqu	xgft1_hi, [tmp+16]	;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	movdqu	xgft2_lo, [tmp+vec*(32/PS)]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-	movdqu	xgft2_hi, [tmp+vec*(32/PS)+16]	;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	movdqu	xgft3_lo, [tmp+vec*(64/PS)]	;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-	movdqu	xgft3_hi, [tmp+vec*(64/PS)+16]	;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-
-	add	tmp, 32
-	add	vec_i, PS
-	XLDR	x0, [ptr+pos]		;Get next source vector
-
-	movdqa	xtmpa, x0		;Keep unshifted copy of src
-	psraw	x0, 4			;Shift to put high nibble into bits 4-0
-	pand	x0, xmask0f		;Mask high src nibble in bits 4-0
-	pand	xtmpa, xmask0f		;Mask low src nibble in bits 4-0
-
-	pshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	pxor	xp1, xgft1_hi		;xp1 += partial
-
-	pshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	pxor	xp2, xgft2_hi		;xp2 += partial
-
-	pshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	pxor	xp3, xgft3_hi		;xp3 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-	XSTR	[dest3+pos], xp3
-
-	add	pos, 16			;Loop on 16 bytes at a time
-	cmp	pos, len
-	jle	.loop16
-
-	lea	tmp, [len + 16]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop16		;Do one more overlap pass
-
-.return_pass:
-	mov	return, 0
-	FUNC_RESTORE
-	ret
-
-.return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                  core, ver, snum
-slversion GF_3VECT_DOT_PROD_SSE, 00,  03,  0063
--- a/pkg/encoding/erasure/gf-4vect-dot-prod-avx.asm
+++ b/pkg/encoding/erasure/gf-4vect-dot-prod-avx.asm
@ -1,334 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_4vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_4VECT_DOT_PROD_AVX _gf_4vect_dot_prod_avx
-%else
- %define GF_4VECT_DOT_PROD_AVX gf_4vect_dot_prod_avx
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define arg5   r15 		; must be saved and restored
- %define tmp    r11
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define tmp5   rdi		; must be saved and restored
- %define tmp6   rsi		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  9*16 + 7*8		; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	save_xmm128	xmm6, 0*16
-	save_xmm128	xmm7, 1*16
-	save_xmm128	xmm8, 2*16
-	save_xmm128	xmm9, 3*16
-	save_xmm128	xmm10, 4*16
-	save_xmm128	xmm11, 5*16
-	save_xmm128	xmm12, 6*16
-	save_xmm128	xmm13, 7*16
-	save_xmm128	xmm14, 8*16
-	save_reg	r12,  9*16 + 0*8
-	save_reg	r13,  9*16 + 1*8
-	save_reg	r14,  9*16 + 2*8
-	save_reg	r15,  9*16 + 3*8
-	save_reg	rdi,  9*16 + 4*8
-	save_reg	rsi,  9*16 + 5*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	vmovdqa	xmm6, [rsp + 0*16]
-	vmovdqa	xmm7, [rsp + 1*16]
-	vmovdqa	xmm8, [rsp + 2*16]
-	vmovdqa	xmm9, [rsp + 3*16]
-	vmovdqa	xmm10, [rsp + 4*16]
-	vmovdqa	xmm11, [rsp + 5*16]
-	vmovdqa	xmm12, [rsp + 6*16]
-	vmovdqa	xmm13, [rsp + 7*16]
-	vmovdqa	xmm14, [rsp + 8*16]
-	mov	r12,  [rsp + 9*16 + 0*8]
-	mov	r13,  [rsp + 9*16 + 1*8]
-	mov	r14,  [rsp + 9*16 + 2*8]
-	mov	r15,  [rsp + 9*16 + 3*8]
-	mov	rdi,  [rsp + 9*16 + 4*8]
-	mov	rsi,  [rsp + 9*16 + 5*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-
-%define len    arg0
-%define vec    arg1
-%define mul_array arg2
-%define	src    arg3
-%define dest1  arg4
-%define ptr    arg5
-%define vec_i  tmp2
-%define dest2  tmp3
-%define dest3  tmp4
-%define dest4  tmp5
-%define vskip3 tmp6
-%define pos   return
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR vmovdqu
- %define XSTR vmovdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR vmovdqa
-  %define XSTR vmovdqa
- %else
-  %define XLDR vmovntdqa
-  %define XSTR vmovntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   xmm14
-%define xgft1_lo  xmm13
-%define xgft1_hi  xmm12
-%define xgft2_lo  xmm11
-%define xgft2_hi  xmm10
-%define xgft3_lo  xmm9
-%define xgft3_hi  xmm8
-%define xgft4_lo  xmm7
-%define xgft4_hi  xmm6
-
-
-%define x0     xmm0
-%define xtmpa  xmm1
-%define xp1    xmm2
-%define xp2    xmm3
-%define xp3    xmm4
-%define xp4    xmm5
-
-align 16
-global GF_4VECT_DOT_PROD_AVX:function
-func(GF_4VECT_DOT_PROD_AVX)
-	FUNC_SAVE
-	sub	len, 16
-	jl	.return_fail
-	xor	pos, pos
-	vmovdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-	mov	vskip3, vec
-	imul	vskip3, 96
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest2, [dest1+PS]
-	mov	dest3, [dest1+2*PS]
-	mov	dest4, [dest1+3*PS]
-	mov	dest1, [dest1]
-
-
-.loop16:
-	vpxor	xp1, xp1
-	vpxor	xp2, xp2
-	vpxor	xp3, xp3
-	vpxor	xp4, xp4
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-
-.next_vect:
-	mov	ptr, [src+vec_i]
-
-	vmovdqu	xgft1_lo, [tmp]			;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-	vmovdqu	xgft1_hi, [tmp+16]		;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	vmovdqu	xgft2_lo, [tmp+vec*(32/PS)]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-	vmovdqu	xgft2_hi, [tmp+vec*(32/PS)+16]	;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	vmovdqu	xgft3_lo, [tmp+vec*(64/PS)]	;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-	vmovdqu	xgft3_hi, [tmp+vec*(64/PS)+16]	;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-	vmovdqu	xgft4_lo, [tmp+vskip3]		;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-	vmovdqu	xgft4_hi, [tmp+vskip3+16]	;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-
-	XLDR	x0, [ptr+pos]		;Get next source vector
-	add	tmp, 32
-	add	vec_i, PS
-
-	vpand	xtmpa, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp1, xgft1_hi		;xp1 += partial
-
-	vpshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	vpxor	xp2, xgft2_hi		;xp2 += partial
-
-	vpshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	vpxor	xp3, xgft3_hi		;xp3 += partial
-
-	vpshufb	xgft4_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft4_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft4_hi, xgft4_lo	;GF add high and low partials
-	vpxor	xp4, xgft4_hi		;xp4 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-	XSTR	[dest3+pos], xp3
-	XSTR	[dest4+pos], xp4
-
-	add	pos, 16			;Loop on 16 bytes at a time
-	cmp	pos, len
-	jle	.loop16
-
-	lea	tmp, [len + 16]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop16		;Do one more overlap pass
-
-.return_pass:
-	mov	return, 0
-	FUNC_RESTORE
-	ret
-
-.return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-mask0f:	ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                  core, ver, snum
-slversion GF_4VECT_DOT_PROD_AVX, 00,  02,  0064
--- a/pkg/encoding/erasure/gf-4vect-dot-prod-avx2.asm
+++ b/pkg/encoding/erasure/gf-4vect-dot-prod-avx2.asm
@ -1,345 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_4vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_4VECT_DOT_PROD_AVX2 _gf_4vect_dot_prod_avx2
-%else
- %define GF_4VECT_DOT_PROD_AVX2 gf_4vect_dot_prod_avx2
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp.w r11d
- %define tmp.b r11b
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp.w r11d
- %define tmp.b r11b
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define arg5   r15 		; must be saved and restored
- %define tmp    r11
- %define tmp.w  r11d
- %define tmp.b  r11b
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define tmp5   rdi		; must be saved and restored
- %define tmp6   rsi		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  9*16 + 7*8		; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	vmovdqa	[rsp + 0*16], xmm6
-	vmovdqa	[rsp + 1*16], xmm7
-	vmovdqa	[rsp + 2*16], xmm8
-	vmovdqa	[rsp + 3*16], xmm9
-	vmovdqa	[rsp + 4*16], xmm10
-	vmovdqa	[rsp + 5*16], xmm11
-	vmovdqa	[rsp + 6*16], xmm12
-	vmovdqa	[rsp + 7*16], xmm13
-	vmovdqa	[rsp + 8*16], xmm14
-	save_reg	r12,  9*16 + 0*8
-	save_reg	r13,  9*16 + 1*8
-	save_reg	r14,  9*16 + 2*8
-	save_reg	r15,  9*16 + 3*8
-	save_reg	rdi,  9*16 + 4*8
-	save_reg	rsi,  9*16 + 5*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	vmovdqa	xmm6, [rsp + 0*16]
-	vmovdqa	xmm7, [rsp + 1*16]
-	vmovdqa	xmm8, [rsp + 2*16]
-	vmovdqa	xmm9, [rsp + 3*16]
-	vmovdqa	xmm10, [rsp + 4*16]
-	vmovdqa	xmm11, [rsp + 5*16]
-	vmovdqa	xmm12, [rsp + 6*16]
-	vmovdqa	xmm13, [rsp + 7*16]
-	vmovdqa	xmm14, [rsp + 8*16]
-	mov	r12,  [rsp + 9*16 + 0*8]
-	mov	r13,  [rsp + 9*16 + 1*8]
-	mov	r14,  [rsp + 9*16 + 2*8]
-	mov	r15,  [rsp + 9*16 + 3*8]
-	mov	rdi,  [rsp + 9*16 + 4*8]
-	mov	rsi,  [rsp + 9*16 + 5*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-
-%define len    arg0
-%define vec    arg1
-%define mul_array arg2
-%define	src    arg3
-%define dest1  arg4
-%define ptr    arg5
-%define vec_i  tmp2
-%define dest2  tmp3
-%define dest3  tmp4
-%define dest4  tmp5
-%define vskip3 tmp6
-%define pos   return
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR vmovdqu
- %define XSTR vmovdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR vmovdqa
-  %define XSTR vmovdqa
- %else
-  %define XLDR vmovntdqa
-  %define XSTR vmovntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   ymm14
-%define xmask0fx  xmm14
-%define xgft1_lo  ymm13
-%define xgft1_hi  ymm12
-%define xgft2_lo  ymm11
-%define xgft2_hi  ymm10
-%define xgft3_lo  ymm9
-%define xgft3_hi  ymm8
-%define xgft4_lo  ymm7
-%define xgft4_hi  ymm6
-
-
-%define x0     ymm0
-%define xtmpa  ymm1
-%define xp1    ymm2
-%define xp2    ymm3
-%define xp3    ymm4
-%define xp4    ymm5
-
-align 16
-global GF_4VECT_DOT_PROD_AVX2:function
-func(GF_4VECT_DOT_PROD_AVX2)
-	FUNC_SAVE
-	sub	len, 32
-	jl	.return_fail
-	xor	pos, pos
-	mov	tmp.b, 0x0f
-	vpinsrb	xmask0fx, xmask0fx, tmp.w, 0
-	vpbroadcastb xmask0f, xmask0fx	;Construct mask 0x0f0f0f...
-	mov	vskip3, vec
-	imul	vskip3, 96
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest2, [dest1+PS]
-	mov	dest3, [dest1+2*PS]
-	mov	dest4, [dest1+3*PS]
-	mov	dest1, [dest1]
-
-
-.loop32:
-	vpxor	xp1, xp1
-	vpxor	xp2, xp2
-	vpxor	xp3, xp3
-	vpxor	xp4, xp4
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-
-.next_vect:
-	mov	ptr, [src+vec_i]
-	XLDR	x0, [ptr+pos]		;Get next source vector
-	add	vec_i, PS
-
-	vpand	xgft4_lo, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-	vperm2i128 xtmpa, xgft4_lo, x0, 0x30 	;swap xtmpa from 1lo|2lo to 1lo|2hi
-	vperm2i128 x0, xgft4_lo, x0, 0x12	;swap x0 from    1hi|2hi to 1hi|2lo
-
-	vmovdqu	xgft1_lo, [tmp]			;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-						;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	vmovdqu	xgft2_lo, [tmp+vec*(32/PS)]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-						;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	vmovdqu	xgft3_lo, [tmp+vec*(64/PS)]	;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-						;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-	vmovdqu	xgft4_lo, [tmp+vskip3]		;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-						;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-
-	vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
-	vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
-	vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
-	vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
-	add	tmp, 32
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp1, xgft1_hi		;xp1 += partial
-
-	vpshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	vpxor	xp2, xgft2_hi		;xp2 += partial
-
-	vpshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	vpxor	xp3, xgft3_hi		;xp3 += partial
-
-	vpshufb	xgft4_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft4_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft4_hi, xgft4_lo	;GF add high and low partials
-	vpxor	xp4, xgft4_hi		;xp4 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-	XSTR	[dest3+pos], xp3
-	XSTR	[dest4+pos], xp4
-
-	add	pos, 32			;Loop on 32 bytes at a time
-	cmp	pos, len
-	jle	.loop32
-
-	lea	tmp, [len + 32]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-32
-	jmp	.loop32		;Do one more overlap pass
-
-.return_pass:
-	mov	return, 0
-	FUNC_RESTORE
-	ret
-
-.return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                   core, ver, snum
-slversion GF_4VECT_DOT_PROD_AVX2, 04,  03,  0064
--- a/pkg/encoding/erasure/gf-4vect-dot-prod-sse.asm
+++ b/pkg/encoding/erasure/gf-4vect-dot-prod-sse.asm
@ -1,334 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_4vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_4VECT_DOT_PROD_SSE _gf_4vect_dot_prod_sse
-%else
- %define GF_4VECT_DOT_PROD_SSE gf_4vect_dot_prod_sse
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define arg5   r15 		; must be saved and restored
- %define tmp    r11
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define tmp5   rdi		; must be saved and restored
- %define tmp6   rsi		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  9*16 + 7*8		; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	save_xmm128	xmm6, 0*16
-	save_xmm128	xmm7, 1*16
-	save_xmm128	xmm8, 2*16
-	save_xmm128	xmm9, 3*16
-	save_xmm128	xmm10, 4*16
-	save_xmm128	xmm11, 5*16
-	save_xmm128	xmm12, 6*16
-	save_xmm128	xmm13, 7*16
-	save_xmm128	xmm14, 8*16
-	save_reg	r12,  9*16 + 0*8
-	save_reg	r13,  9*16 + 1*8
-	save_reg	r14,  9*16 + 2*8
-	save_reg	r15,  9*16 + 3*8
-	save_reg	rdi,  9*16 + 4*8
-	save_reg	rsi,  9*16 + 5*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	movdqa	xmm6, [rsp + 0*16]
-	movdqa	xmm7, [rsp + 1*16]
-	movdqa	xmm8, [rsp + 2*16]
-	movdqa	xmm9, [rsp + 3*16]
-	movdqa	xmm10, [rsp + 4*16]
-	movdqa	xmm11, [rsp + 5*16]
-	movdqa	xmm12, [rsp + 6*16]
-	movdqa	xmm13, [rsp + 7*16]
-	movdqa	xmm14, [rsp + 8*16]
-	mov	r12,  [rsp + 9*16 + 0*8]
-	mov	r13,  [rsp + 9*16 + 1*8]
-	mov	r14,  [rsp + 9*16 + 2*8]
-	mov	r15,  [rsp + 9*16 + 3*8]
-	mov	rdi,  [rsp + 9*16 + 4*8]
-	mov	rsi,  [rsp + 9*16 + 5*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-
-%define len    arg0
-%define vec    arg1
-%define mul_array arg2
-%define	src    arg3
-%define dest1  arg4
-%define ptr    arg5
-%define vec_i  tmp2
-%define dest2  tmp3
-%define dest3  tmp4
-%define dest4  tmp5
-%define vskip3 tmp6
-%define pos   return
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR movdqu
- %define XSTR movdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR movdqa
-  %define XSTR movdqa
- %else
-  %define XLDR movntdqa
-  %define XSTR movntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   xmm14
-%define xgft1_lo  xmm13
-%define xgft1_hi  xmm12
-%define xgft2_lo  xmm11
-%define xgft2_hi  xmm10
-%define xgft3_lo  xmm9
-%define xgft3_hi  xmm8
-%define xgft4_lo  xmm7
-%define xgft4_hi  xmm6
-
-
-%define x0     xmm0
-%define xtmpa  xmm1
-%define xp1    xmm2
-%define xp2    xmm3
-%define xp3    xmm4
-%define xp4    xmm5
-
-align 16
-global GF_4VECT_DOT_PROD_SSE:function
-func(GF_4VECT_DOT_PROD_SSE)
-	FUNC_SAVE
-	sub	len, 16
-	jl	.return_fail
-	xor	pos, pos
-	movdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-	mov	vskip3, vec
-	imul	vskip3, 96
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest2, [dest1+PS]
-	mov	dest3, [dest1+2*PS]
-	mov	dest4, [dest1+3*PS]
-	mov	dest1, [dest1]
-
-
-.loop16:
-	pxor	xp1, xp1
-	pxor	xp2, xp2
-	pxor	xp3, xp3
-	pxor	xp4, xp4
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-
-.next_vect:
-	mov	ptr, [src+vec_i]
-
-	movdqu	xgft1_lo, [tmp]			;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-	movdqu	xgft1_hi, [tmp+16]		;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	movdqu	xgft2_lo, [tmp+vec*(32/PS)]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-	movdqu	xgft2_hi, [tmp+vec*(32/PS)+16]	;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	movdqu	xgft3_lo, [tmp+vec*(64/PS)]	;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-	movdqu	xgft3_hi, [tmp+vec*(64/PS)+16]	;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-	movdqu	xgft4_lo, [tmp+vskip3]		;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-	movdqu	xgft4_hi, [tmp+vskip3+16]	;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-
-	XLDR	x0, [ptr+pos]		;Get next source vector
-	add	tmp, 32
-	add	vec_i, PS
-
-	movdqa	xtmpa, x0		;Keep unshifted copy of src
-	psraw	x0, 4			;Shift to put high nibble into bits 4-0
-	pand	x0, xmask0f		;Mask high src nibble in bits 4-0
-	pand	xtmpa, xmask0f		;Mask low src nibble in bits 4-0
-
-	pshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	pxor	xp1, xgft1_hi		;xp1 += partial
-
-	pshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	pxor	xp2, xgft2_hi		;xp2 += partial
-
-	pshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	pxor	xp3, xgft3_hi		;xp3 += partial
-
-	pshufb	xgft4_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft4_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft4_hi, xgft4_lo	;GF add high and low partials
-	pxor	xp4, xgft4_hi		;xp4 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-	XSTR	[dest3+pos], xp3
-	XSTR	[dest4+pos], xp4
-
-	add	pos, 16			;Loop on 16 bytes at a time
-	cmp	pos, len
-	jle	.loop16
-
-	lea	tmp, [len + 16]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop16		;Do one more overlap pass
-
-.return_pass:
-	mov	return, 0
-	FUNC_RESTORE
-	ret
-
-.return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-mask0f:	ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                  core, ver, snum
-slversion GF_4VECT_DOT_PROD_SSE, 00,  03,  0064
--- a/pkg/encoding/erasure/gf-5vect-dot-prod-avx.asm
+++ b/pkg/encoding/erasure/gf-5vect-dot-prod-avx.asm
@ -1,349 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_5vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_5VECT_DOT_PROD_AVX _gf_5vect_dot_prod_avx
-%else
- %define GF_5VECT_DOT_PROD_AVX gf_5vect_dot_prod_avx
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define arg5   r15 		; must be saved and restored
- %define tmp    r11
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define tmp5   rdi		; must be saved and restored
- %define tmp6   rsi		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  10*16 + 7*8		; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	save_xmm128	xmm6, 0*16
-	save_xmm128	xmm7, 1*16
-	save_xmm128	xmm8, 2*16
-	save_xmm128	xmm9, 3*16
-	save_xmm128	xmm10, 4*16
-	save_xmm128	xmm11, 5*16
-	save_xmm128	xmm12, 6*16
-	save_xmm128	xmm13, 7*16
-	save_xmm128	xmm14, 8*16
-	save_xmm128	xmm15, 9*16
-	save_reg	r12,  10*16 + 0*8
-	save_reg	r13,  10*16 + 1*8
-	save_reg	r14,  10*16 + 2*8
-	save_reg	r15,  10*16 + 3*8
-	save_reg	rdi,  10*16 + 4*8
-	save_reg	rsi,  10*16 + 5*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	vmovdqa	xmm6, [rsp + 0*16]
-	vmovdqa	xmm7, [rsp + 1*16]
-	vmovdqa	xmm8, [rsp + 2*16]
-	vmovdqa	xmm9, [rsp + 3*16]
-	vmovdqa	xmm10, [rsp + 4*16]
-	vmovdqa	xmm11, [rsp + 5*16]
-	vmovdqa	xmm12, [rsp + 6*16]
-	vmovdqa	xmm13, [rsp + 7*16]
-	vmovdqa	xmm14, [rsp + 8*16]
-	vmovdqa	xmm15, [rsp + 9*16]
-	mov	r12,  [rsp + 10*16 + 0*8]
-	mov	r13,  [rsp + 10*16 + 1*8]
-	mov	r14,  [rsp + 10*16 + 2*8]
-	mov	r15,  [rsp + 10*16 + 3*8]
-	mov	rdi,  [rsp + 10*16 + 4*8]
-	mov	rsi,  [rsp + 10*16 + 5*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-%define len    arg0
-%define vec    arg1
-%define mul_array arg2
-%define	src    arg3
-%define dest   arg4
-%define ptr    arg5
-%define vec_i  tmp2
-%define dest1  tmp3
-%define dest2  tmp4
-%define vskip1 tmp5
-%define vskip3 tmp6
-%define pos    return
-
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR vmovdqu
- %define XSTR vmovdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR vmovdqa
-  %define XSTR vmovdqa
- %else
-  %define XLDR vmovntdqa
-  %define XSTR vmovntdq
- %endif
-%endif
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   xmm15
-%define xgft1_lo  xmm14
-%define xgft1_hi  xmm13
-%define xgft2_lo  xmm12
-%define xgft2_hi  xmm11
-%define xgft3_lo  xmm10
-%define xgft3_hi  xmm9
-%define xgft4_lo  xmm8
-%define xgft4_hi  xmm7
-
-
-%define x0     xmm0
-%define xtmpa  xmm1
-%define xp1    xmm2
-%define xp2    xmm3
-%define xp3    xmm4
-%define xp4    xmm5
-%define xp5    xmm6
-
-align 16
-global gf_5vect_dot_prod_avx:function
-func(gf_5vect_dot_prod_avx)
-	FUNC_SAVE
-	sub	len, 16
-	jl	.return_fail
-	xor	pos, pos
-	vmovdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-	mov	vskip1, vec
-	imul	vskip1, 32
-	mov	vskip3, vec
-	imul	vskip3, 96
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest1, [dest]
-	mov	dest2, [dest+PS]
-
-
-.loop16:
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-	vpxor	xp1, xp1
-	vpxor	xp2, xp2
-	vpxor	xp3, xp3
-	vpxor	xp4, xp4
-	vpxor	xp5, xp5
-
-
-.next_vect:
-	mov	ptr, [src+vec_i]
-	add	vec_i, PS
-	XLDR	x0, [ptr+pos]		;Get next source vector
-
-	vmovdqu	xgft1_lo, [tmp]			;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-	vmovdqu	xgft1_hi, [tmp+16]		;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	vmovdqu	xgft2_lo, [tmp+vskip1*1]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-	vmovdqu	xgft2_hi, [tmp+vskip1*1+16]	;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	vmovdqu	xgft3_lo, [tmp+vskip1*2]	;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-	vmovdqu	xgft3_hi, [tmp+vskip1*2+16]	;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-	vmovdqu	xgft4_lo, [tmp+vskip3]		;Load array Dx{00}, Dx{01}, ..., Dx{0f}
-	vmovdqu	xgft4_hi, [tmp+vskip3+16]	;     "     Dx{00}, Dx{10}, ..., Dx{f0}
-
-	vpand	xtmpa, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp1, xgft1_hi		;xp1 += partial
-
-	vpshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	vpxor	xp2, xgft2_hi		;xp2 += partial
-
-	vmovdqu	xgft1_lo, [tmp+vskip1*4]	;Load array Ex{00}, Ex{01}, ..., Ex{0f}
-	vmovdqu	xgft1_hi, [tmp+vskip1*4+16]	;     "     Ex{00}, Ex{10}, ..., Ex{f0}
-	add	tmp, 32
-
-	vpshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	vpxor	xp3, xgft3_hi		;xp3 += partial
-
-	vpshufb	xgft4_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft4_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft4_hi, xgft4_lo	;GF add high and low partials
-	vpxor	xp4, xgft4_hi		;xp4 += partial
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp5, xgft1_hi		;xp5 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	mov	tmp, [dest+2*PS]
-	mov	ptr, [dest+3*PS]
-	mov	vec_i, [dest+4*PS]
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-	XSTR	[tmp+pos], xp3
-	XSTR	[ptr+pos], xp4
-	XSTR	[vec_i+pos], xp5
-
-	add	pos, 16			;Loop on 16 bytes at a time
-	cmp	pos, len
-	jle	.loop16
-
-	lea	tmp, [len + 16]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop16		;Do one more overlap pass
-
-.return_pass:
-	FUNC_RESTORE
-	mov	return, 0
-	ret
-
-.return_fail:
-	FUNC_RESTORE
-	mov	return, 1
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-mask0f:	ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                  core, ver, snum
-slversion gf_5vect_dot_prod_avx, 02,  03,  0194
--- a/pkg/encoding/erasure/gf-5vect-dot-prod-avx2.asm
+++ b/pkg/encoding/erasure/gf-5vect-dot-prod-avx2.asm
@ -1,363 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_5vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_5VECT_DOT_PROD_AVX2 _gf_5vect_dot_prod_avx2
-%else
- %define GF_5VECT_DOT_PROD_AVX2 gf_5vect_dot_prod_avx2
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp.w r11d
- %define tmp.b r11b
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp.w r11d
- %define tmp.b r11b
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define arg5   r15 		; must be saved and restored
- %define tmp    r11
- %define tmp.w  r11d
- %define tmp.b  r11b
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define tmp5   rdi		; must be saved and restored
- %define tmp6   rsi		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  10*16 + 7*8		; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	vmovdqa	[rsp + 0*16], xmm6
-	vmovdqa	[rsp + 1*16], xmm7
-	vmovdqa	[rsp + 2*16], xmm8
-	vmovdqa	[rsp + 3*16], xmm9
-	vmovdqa	[rsp + 4*16], xmm10
-	vmovdqa	[rsp + 5*16], xmm11
-	vmovdqa	[rsp + 6*16], xmm12
-	vmovdqa	[rsp + 7*16], xmm13
-	vmovdqa	[rsp + 8*16], xmm14
-	vmovdqa	[rsp + 9*16], xmm15
-	save_reg	r12,  10*16 + 0*8
-	save_reg	r13,  10*16 + 1*8
-	save_reg	r14,  10*16 + 2*8
-	save_reg	r15,  10*16 + 3*8
-	save_reg	rdi,  10*16 + 4*8
-	save_reg	rsi,  10*16 + 5*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	vmovdqa	xmm6, [rsp + 0*16]
-	vmovdqa	xmm7, [rsp + 1*16]
-	vmovdqa	xmm8, [rsp + 2*16]
-	vmovdqa	xmm9, [rsp + 3*16]
-	vmovdqa	xmm10, [rsp + 4*16]
-	vmovdqa	xmm11, [rsp + 5*16]
-	vmovdqa	xmm12, [rsp + 6*16]
-	vmovdqa	xmm13, [rsp + 7*16]
-	vmovdqa	xmm14, [rsp + 8*16]
-	vmovdqa	xmm15, [rsp + 9*16]
-	mov	r12,  [rsp + 10*16 + 0*8]
-	mov	r13,  [rsp + 10*16 + 1*8]
-	mov	r14,  [rsp + 10*16 + 2*8]
-	mov	r15,  [rsp + 10*16 + 3*8]
-	mov	rdi,  [rsp + 10*16 + 4*8]
-	mov	rsi,  [rsp + 10*16 + 5*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-%define len    arg0
-%define vec    arg1
-%define mul_array arg2
-%define	src    arg3
-%define dest   arg4
-%define ptr    arg5
-%define vec_i  tmp2
-%define dest1  tmp3
-%define dest2  tmp4
-%define vskip1 tmp5
-%define vskip3 tmp6
-%define pos    return
-
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR vmovdqu
- %define XSTR vmovdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR vmovdqa
-  %define XSTR vmovdqa
- %else
-  %define XLDR vmovntdqa
-  %define XSTR vmovntdq
- %endif
-%endif
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   ymm15
-%define xmask0fx  xmm15
-%define xgft1_lo  ymm14
-%define xgft1_hi  ymm13
-%define xgft2_lo  ymm12
-%define xgft2_hi  ymm11
-%define xgft3_lo  ymm10
-%define xgft3_hi  ymm9
-%define xgft4_lo  ymm8
-%define xgft4_hi  ymm7
-
-
-%define x0     ymm0
-%define xtmpa  ymm1
-%define xp1    ymm2
-%define xp2    ymm3
-%define xp3    ymm4
-%define xp4    ymm5
-%define xp5    ymm6
-
-align 16
-global GF_5VECT_DOT_PROD_AVX2:function
-func(GF_5VECT_DOT_PROD_AVX2)
-	FUNC_SAVE
-	sub	len, 32
-	jl	.return_fail
-	xor	pos, pos
-	mov	tmp.b, 0x0f
-	vpinsrb	xmask0fx, xmask0fx, tmp.w, 0
-	vpbroadcastb xmask0f, xmask0fx	;Construct mask 0x0f0f0f...
-	mov	vskip1, vec
-	imul	vskip1, 32
-	mov	vskip3, vec
-	imul	vskip3, 96
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest1, [dest]
-	mov	dest2, [dest+PS]
-
-
-.loop32:
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-	vpxor	xp1, xp1
-	vpxor	xp2, xp2
-	vpxor	xp3, xp3
-	vpxor	xp4, xp4
-	vpxor	xp5, xp5
-
-
-.next_vect:
-	mov	ptr, [src+vec_i]
-	XLDR	x0, [ptr+pos]		;Get next source vector
-	add	vec_i, PS
-
-	vpand	xgft4_lo, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-	vperm2i128 xtmpa, xgft4_lo, x0, 0x30 	;swap xtmpa from 1lo|2lo to 1lo|2hi
-	vperm2i128 x0, xgft4_lo, x0, 0x12	;swap x0 from    1hi|2hi to 1hi|2lo
-
-	vmovdqu	xgft1_lo, [tmp]			;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-						;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	vmovdqu	xgft2_lo, [tmp+vskip1*1]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-						;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	vmovdqu	xgft3_lo, [tmp+vskip1*2]	;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-						;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-	vmovdqu	xgft4_lo, [tmp+vskip3]		;Load array Dx{00}, Dx{01}, ..., Dx{0f}
-						;     "     Dx{00}, Dx{10}, ..., Dx{f0}
-
-	vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
-	vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
-	vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
-	vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp1, xgft1_hi		;xp1 += partial
-
-	vpshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	vpxor	xp2, xgft2_hi		;xp2 += partial
-
-	vmovdqu	xgft1_lo, [tmp+vskip1*4]	;Load array Ex{00}, Ex{01}, ..., Ex{0f}
-						;     "     Ex{00}, Ex{10}, ..., Ex{f0}
-	vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
-	add	tmp, 32
-
-	vpshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	vpxor	xp3, xgft3_hi		;xp3 += partial
-
-	vpshufb	xgft4_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft4_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft4_hi, xgft4_lo	;GF add high and low partials
-	vpxor	xp4, xgft4_hi		;xp4 += partial
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp5, xgft1_hi		;xp5 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	mov	tmp, [dest+2*PS]
-	mov	ptr, [dest+3*PS]
-	mov	vec_i, [dest+4*PS]
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-	XSTR	[tmp+pos], xp3
-	XSTR	[ptr+pos], xp4
-	XSTR	[vec_i+pos], xp5
-
-	add	pos, 32			;Loop on 32 bytes at a time
-	cmp	pos, len
-	jle	.loop32
-
-	lea	tmp, [len + 32]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop32		;Do one more overlap pass
-
-.return_pass:
-	FUNC_RESTORE
-	mov	return, 0
-	ret
-
-.return_fail:
-	FUNC_RESTORE
-	mov	return, 1
-	ret
-
-endproc_frame
-
-section .data
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                  core, ver, snum
-slversion GF_5VECT_DOT_PROD_AVX2, 04,  03,  0199
--- a/pkg/encoding/erasure/gf-5vect-dot-prod-sse.asm
+++ b/pkg/encoding/erasure/gf-5vect-dot-prod-sse.asm
@ -1,350 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_5vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_5VECT_DOT_PROD_SSE _gf_5vect_dot_prod_sse
-%else
- %define GF_5VECT_DOT_PROD_SSE gf_5vect_dot_prod_sse
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define arg5   r15 		; must be saved and restored
- %define tmp    r11
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define tmp5   rdi		; must be saved and restored
- %define tmp6   rsi		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  10*16 + 7*8		; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	save_xmm128	xmm6, 0*16
-	save_xmm128	xmm7, 1*16
-	save_xmm128	xmm8, 2*16
-	save_xmm128	xmm9, 3*16
-	save_xmm128	xmm10, 4*16
-	save_xmm128	xmm11, 5*16
-	save_xmm128	xmm12, 6*16
-	save_xmm128	xmm13, 7*16
-	save_xmm128	xmm14, 8*16
-	save_xmm128	xmm15, 9*16
-	save_reg	r12,  10*16 + 0*8
-	save_reg	r13,  10*16 + 1*8
-	save_reg	r14,  10*16 + 2*8
-	save_reg	r15,  10*16 + 3*8
-	save_reg	rdi,  10*16 + 4*8
-	save_reg	rsi,  10*16 + 5*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	movdqa	xmm6, [rsp + 0*16]
-	movdqa	xmm7, [rsp + 1*16]
-	movdqa	xmm8, [rsp + 2*16]
-	movdqa	xmm9, [rsp + 3*16]
-	movdqa	xmm10, [rsp + 4*16]
-	movdqa	xmm11, [rsp + 5*16]
-	movdqa	xmm12, [rsp + 6*16]
-	movdqa	xmm13, [rsp + 7*16]
-	movdqa	xmm14, [rsp + 8*16]
-	movdqa	xmm15, [rsp + 9*16]
-	mov	r12,  [rsp + 10*16 + 0*8]
-	mov	r13,  [rsp + 10*16 + 1*8]
-	mov	r14,  [rsp + 10*16 + 2*8]
-	mov	r15,  [rsp + 10*16 + 3*8]
-	mov	rdi,  [rsp + 10*16 + 4*8]
-	mov	rsi,  [rsp + 10*16 + 5*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-%define len    arg0
-%define vec    arg1
-%define mul_array arg2
-%define	src    arg3
-%define dest   arg4
-%define ptr    arg5
-%define vec_i  tmp2
-%define dest1  tmp3
-%define dest2  tmp4
-%define vskip1 tmp5
-%define vskip3 tmp6
-%define pos    return
-
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR movdqu
- %define XSTR movdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR movdqa
-  %define XSTR movdqa
- %else
-  %define XLDR movntdqa
-  %define XSTR movntdq
- %endif
-%endif
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   xmm15
-%define xgft1_lo  xmm14
-%define xgft1_hi  xmm13
-%define xgft2_lo  xmm12
-%define xgft2_hi  xmm11
-%define xgft3_lo  xmm10
-%define xgft3_hi  xmm9
-%define xgft4_lo  xmm8
-%define xgft4_hi  xmm7
-
-
-%define x0     xmm0
-%define xtmpa  xmm1
-%define xp1    xmm2
-%define xp2    xmm3
-%define xp3    xmm4
-%define xp4    xmm5
-%define xp5    xmm6
-
-align 16
-global GF_5VECT_DOT_PROD_SSE:function
-func(GF_5VECT_DOT_PROD_SSE)
-	FUNC_SAVE
-	sub	len, 16
-	jl	.return_fail
-	xor	pos, pos
-	movdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-	mov	vskip1, vec
-	imul	vskip1, 32
-	mov	vskip3, vec
-	imul	vskip3, 96
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest1, [dest]
-	mov	dest2, [dest+PS]
-
-
-.loop16:
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-	pxor	xp1, xp1
-	pxor	xp2, xp2
-	pxor	xp3, xp3
-	pxor	xp4, xp4
-	pxor	xp5, xp5
-
-
-.next_vect:
-	mov	ptr, [src+vec_i]
-	add	vec_i, PS
-	XLDR	x0, [ptr+pos]		;Get next source vector
-
-	movdqu	xgft1_lo, [tmp]			;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-	movdqu	xgft1_hi, [tmp+16]		;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	movdqu	xgft2_lo, [tmp+vskip1*1]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-	movdqu	xgft2_hi, [tmp+vskip1*1+16]	;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	movdqu	xgft3_lo, [tmp+vskip1*2]	;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-	movdqu	xgft3_hi, [tmp+vskip1*2+16]	;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-	movdqu	xgft4_lo, [tmp+vskip3]		;Load array Dx{00}, Dx{01}, ..., Dx{0f}
-	movdqu	xgft4_hi, [tmp+vskip3+16]	;     "     Dx{00}, Dx{10}, ..., Dx{f0}
-
-	movdqa	xtmpa, x0		;Keep unshifted copy of src
-	psraw	x0, 4			;Shift to put high nibble into bits 4-0
-	pand	x0, xmask0f		;Mask high src nibble in bits 4-0
-	pand	xtmpa, xmask0f		;Mask low src nibble in bits 4-0
-
-	pshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	pxor	xp1, xgft1_hi		;xp1 += partial
-
-	pshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	pxor	xp2, xgft2_hi		;xp2 += partial
-
-	movdqu	xgft1_lo, [tmp+vskip1*4]	;Load array Ex{00}, Ex{01}, ..., Ex{0f}
-	movdqu	xgft1_hi, [tmp+vskip1*4+16]	;     "     Ex{00}, Ex{10}, ..., Ex{f0}
-	add	tmp, 32
-
-	pshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	pxor	xp3, xgft3_hi		;xp3 += partial
-
-	pshufb	xgft4_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft4_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft4_hi, xgft4_lo	;GF add high and low partials
-	pxor	xp4, xgft4_hi		;xp4 += partial
-
-	pshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	pxor	xp5, xgft1_hi		;xp5 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	mov	tmp, [dest+2*PS]
-	mov	ptr, [dest+3*PS]
-	mov	vec_i, [dest+4*PS]
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-	XSTR	[tmp+pos], xp3
-	XSTR	[ptr+pos], xp4
-	XSTR	[vec_i+pos], xp5
-
-	add	pos, 16			;Loop on 16 bytes at a time
-	cmp	pos, len
-	jle	.loop16
-
-	lea	tmp, [len + 16]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop16		;Do one more overlap pass
-
-.return_pass:
-	FUNC_RESTORE
-	mov	return, 0
-	ret
-
-.return_fail:
-	FUNC_RESTORE
-	mov	return, 1
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-mask0f:	ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                  core, ver, snum
-slversion GF_5VECT_DOT_PROD_SSE, 00,  03,  0065
--- a/pkg/encoding/erasure/gf-6vect-dot-prod-avx.asm
+++ b/pkg/encoding/erasure/gf-6vect-dot-prod-avx.asm
@ -1,361 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_6vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_6VECT_DOT_PROD_AVX _gf_6vect_dot_prod_avx
-%else
- %define GF_6VECT_DOT_PROD_AVX gf_6vect_dot_prod_avx
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define arg5   r15 		; must be saved and restored
- %define tmp    r11
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define tmp5   rdi		; must be saved and restored
- %define tmp6   rsi		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  10*16 + 7*8		; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	save_xmm128	xmm6, 0*16
-	save_xmm128	xmm7, 1*16
-	save_xmm128	xmm8, 2*16
-	save_xmm128	xmm9, 3*16
-	save_xmm128	xmm10, 4*16
-	save_xmm128	xmm11, 5*16
-	save_xmm128	xmm12, 6*16
-	save_xmm128	xmm13, 7*16
-	save_xmm128	xmm14, 8*16
-	save_xmm128	xmm15, 9*16
-	save_reg	r12,  10*16 + 0*8
-	save_reg	r13,  10*16 + 1*8
-	save_reg	r14,  10*16 + 2*8
-	save_reg	r15,  10*16 + 3*8
-	save_reg	rdi,  10*16 + 4*8
-	save_reg	rsi,  10*16 + 5*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	vmovdqa	xmm6, [rsp + 0*16]
-	vmovdqa	xmm7, [rsp + 1*16]
-	vmovdqa	xmm8, [rsp + 2*16]
-	vmovdqa	xmm9, [rsp + 3*16]
-	vmovdqa	xmm10, [rsp + 4*16]
-	vmovdqa	xmm11, [rsp + 5*16]
-	vmovdqa	xmm12, [rsp + 6*16]
-	vmovdqa	xmm13, [rsp + 7*16]
-	vmovdqa	xmm14, [rsp + 8*16]
-	vmovdqa	xmm15, [rsp + 9*16]
-	mov	r12,  [rsp + 10*16 + 0*8]
-	mov	r13,  [rsp + 10*16 + 1*8]
-	mov	r14,  [rsp + 10*16 + 2*8]
-	mov	r15,  [rsp + 10*16 + 3*8]
-	mov	rdi,  [rsp + 10*16 + 4*8]
-	mov	rsi,  [rsp + 10*16 + 5*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-%define len    arg0
-%define vec    arg1
-%define mul_array arg2
-%define	src    arg3
-%define dest   arg4
-%define ptr    arg5
-%define vec_i  tmp2
-%define dest1  tmp3
-%define dest2  tmp4
-%define vskip1 tmp5
-%define vskip3 tmp6
-%define pos    return
-
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR vmovdqu
- %define XSTR vmovdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR vmovdqa
-  %define XSTR vmovdqa
- %else
-  %define XLDR vmovntdqa
-  %define XSTR vmovntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   xmm15
-%define xgft1_lo  xmm14
-%define xgft1_hi  xmm13
-%define xgft2_lo  xmm12
-%define xgft2_hi  xmm11
-%define xgft3_lo  xmm10
-%define xgft3_hi  xmm9
-%define x0     xmm0
-%define xtmpa  xmm1
-%define xp1    xmm2
-%define xp2    xmm3
-%define xp3    xmm4
-%define xp4    xmm5
-%define xp5    xmm6
-%define xp6    xmm7
-
-align 16
-global GF_6VECT_DOT_PROD_AVX:function
-func(GF_6VECT_DOT_PROD_AVX)
-	FUNC_SAVE
-	sub	len, 16
-	jl	.return_fail
-	xor	pos, pos
-	vmovdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-	mov	vskip1, vec
-	imul	vskip1, 32
-	mov	vskip3, vec
-	imul	vskip3, 96
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest1, [dest]
-	mov	dest2, [dest+PS]
-
-
-.loop16:
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-	vpxor	xp1, xp1
-	vpxor	xp2, xp2
-	vpxor	xp3, xp3
-	vpxor	xp4, xp4
-	vpxor	xp5, xp5
-	vpxor	xp6, xp6
-
-.next_vect:
-	mov	ptr, [src+vec_i]
-	add	vec_i, PS
-	XLDR	x0, [ptr+pos]		;Get next source vector
-
-	vmovdqu	xgft1_lo, [tmp]			;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-	vmovdqu	xgft1_hi, [tmp+16]		;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	vmovdqu	xgft2_lo, [tmp+vskip1*1]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-	vmovdqu	xgft2_hi, [tmp+vskip1*1+16]	;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	vmovdqu	xgft3_lo, [tmp+vskip1*2]	;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-	vmovdqu	xgft3_hi, [tmp+vskip1*2+16]	;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-	lea	ptr, [vskip1 + vskip1*4]	;ptr = vskip5
-
-	vpand	xtmpa, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp1, xgft1_hi		;xp1 += partial
-
-	vpshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	vpxor	xp2, xgft2_hi		;xp2 += partial
-
-	vpshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	vpxor	xp3, xgft3_hi		;xp3 += partial
-
-
-	vmovdqu	xgft1_lo, [tmp+vskip3]		;Load array Dx{00}, Dx{01}, ..., Dx{0f}
-	vmovdqu	xgft1_hi, [tmp+vskip3+16]	;     "     Dx{00}, Dx{10}, ..., Dx{f0}
-	vmovdqu	xgft2_lo, [tmp+vskip1*4]	;Load array Ex{00}, Ex{01}, ..., Ex{0f}
-	vmovdqu	xgft2_hi, [tmp+vskip1*4+16]	;     "     Ex{00}, Ex{10}, ..., Ex{f0}
-	vmovdqu	xgft3_lo, [tmp+ptr]		;Load array Fx{00}, Fx{01}, ..., Fx{0f}
-	vmovdqu	xgft3_hi, [tmp+ptr+16]		;     "     Fx{00}, Fx{10}, ..., Fx{f0}
-	add	tmp, 32
-
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp4, xgft1_hi		;xp4 += partial
-
-	vpshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	vpxor	xp5, xgft2_hi		;xp5 += partial
-
-	vpshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	vpxor	xp6, xgft3_hi		;xp6 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-
-	mov	tmp, [dest+2*PS]
-	mov	ptr, [dest+3*PS]
-	mov	vec_i, [dest+4*PS]
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-	XSTR	[tmp+pos], xp3
-	mov	tmp, [dest+5*PS]
-	XSTR	[ptr+pos], xp4
-	XSTR	[vec_i+pos], xp5
-	XSTR	[tmp+pos], xp6
-
-	add	pos, 16			;Loop on 16 bytes at a time
-	cmp	pos, len
-	jle	.loop16
-
-	lea	tmp, [len + 16]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop16		;Do one more overlap pass
-
-.return_pass:
-	FUNC_RESTORE
-	mov	return, 0
-	ret
-
-.return_fail:
-	FUNC_RESTORE
-	mov	return, 1
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-mask0f:	ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                  core, ver, snum
-slversion GF_6VECT_DOT_PROD_AVX, 02,  03,  0195
--- a/pkg/encoding/erasure/gf-6vect-dot-prod-avx2.asm
+++ b/pkg/encoding/erasure/gf-6vect-dot-prod-avx2.asm
@ -1,374 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_6vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_6VECT_DOT_PROD_AVX2 _gf_6vect_dot_prod_avx2
-%else
- %define GF_6VECT_DOT_PROD_AVX2 gf_6vect_dot_prod_avx2
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp.w r11d
- %define tmp.b r11b
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp.w r11d
- %define tmp.b r11b
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define arg5   r15 		; must be saved and restored
- %define tmp    r11
- %define tmp.w  r11d
- %define tmp.b  r11b
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define tmp5   rdi		; must be saved and restored
- %define tmp6   rsi		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  10*16 + 7*8		; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	vmovdqa	[rsp + 0*16], xmm6
-	vmovdqa	[rsp + 1*16], xmm7
-	vmovdqa	[rsp + 2*16], xmm8
-	vmovdqa	[rsp + 3*16], xmm9
-	vmovdqa	[rsp + 4*16], xmm10
-	vmovdqa	[rsp + 5*16], xmm11
-	vmovdqa	[rsp + 6*16], xmm12
-	vmovdqa	[rsp + 7*16], xmm13
-	vmovdqa	[rsp + 8*16], xmm14
-	vmovdqa	[rsp + 9*16], xmm15
-	save_reg	r12,  10*16 + 0*8
-	save_reg	r13,  10*16 + 1*8
-	save_reg	r14,  10*16 + 2*8
-	save_reg	r15,  10*16 + 3*8
-	save_reg	rdi,  10*16 + 4*8
-	save_reg	rsi,  10*16 + 5*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	vmovdqa	xmm6, [rsp + 0*16]
-	vmovdqa	xmm7, [rsp + 1*16]
-	vmovdqa	xmm8, [rsp + 2*16]
-	vmovdqa	xmm9, [rsp + 3*16]
-	vmovdqa	xmm10, [rsp + 4*16]
-	vmovdqa	xmm11, [rsp + 5*16]
-	vmovdqa	xmm12, [rsp + 6*16]
-	vmovdqa	xmm13, [rsp + 7*16]
-	vmovdqa	xmm14, [rsp + 8*16]
-	vmovdqa	xmm15, [rsp + 9*16]
-	mov	r12,  [rsp + 10*16 + 0*8]
-	mov	r13,  [rsp + 10*16 + 1*8]
-	mov	r14,  [rsp + 10*16 + 2*8]
-	mov	r15,  [rsp + 10*16 + 3*8]
-	mov	rdi,  [rsp + 10*16 + 4*8]
-	mov	rsi,  [rsp + 10*16 + 5*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-%define len    arg0
-%define vec    arg1
-%define mul_array arg2
-%define	src    arg3
-%define dest   arg4
-%define ptr    arg5
-%define vec_i  tmp2
-%define dest1  tmp3
-%define dest2  tmp4
-%define vskip1 tmp5
-%define vskip3 tmp6
-%define pos    return
-
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR vmovdqu
- %define XSTR vmovdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR vmovdqa
-  %define XSTR vmovdqa
- %else
-  %define XLDR vmovntdqa
-  %define XSTR vmovntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   ymm15
-%define xmask0fx  xmm15
-%define xgft1_lo  ymm14
-%define xgft1_hi  ymm13
-%define xgft2_lo  ymm12
-%define xgft2_hi  ymm11
-%define xgft3_lo  ymm10
-%define xgft3_hi  ymm9
-%define x0     ymm0
-%define xtmpa  ymm1
-%define xp1    ymm2
-%define xp2    ymm3
-%define xp3    ymm4
-%define xp4    ymm5
-%define xp5    ymm6
-%define xp6    ymm7
-
-align 16
-global gf_6vect_dot_prod_avx2:function
-func(gf_6vect_dot_prod_avx2)
-	FUNC_SAVE
-	sub	len, 32
-	jl	.return_fail
-	xor	pos, pos
-	mov	tmp.b, 0x0f
-	vpinsrb	xmask0fx, xmask0fx, tmp.w, 0
-	vpbroadcastb xmask0f, xmask0fx	;Construct mask 0x0f0f0f...
-	mov	vskip1, vec
-	imul	vskip1, 32
-	mov	vskip3, vec
-	imul	vskip3, 96
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest1, [dest]
-	mov	dest2, [dest+PS]
-
-
-.loop32:
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-	vpxor	xp1, xp1
-	vpxor	xp2, xp2
-	vpxor	xp3, xp3
-	vpxor	xp4, xp4
-	vpxor	xp5, xp5
-	vpxor	xp6, xp6
-
-.next_vect:
-	mov	ptr, [src+vec_i]
-	XLDR	x0, [ptr+pos]		;Get next source vector
-	add	vec_i, PS
-
-	vpand	xgft3_lo, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-	vperm2i128 xtmpa, xgft3_lo, x0, 0x30 	;swap xtmpa from 1lo|2lo to 1lo|2hi
-	vperm2i128 x0, xgft3_lo, x0, 0x12	;swap x0 from    1hi|2hi to 1hi|2lo
-
-	vmovdqu	xgft1_lo, [tmp]			;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-						;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	vmovdqu	xgft2_lo, [tmp+vskip1*1]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-						;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	vmovdqu	xgft3_lo, [tmp+vskip1*2]	;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-						;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-	lea	ptr, [vskip1 + vskip1*4]	;ptr = vskip5
-
-	vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
-	vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
-	vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp1, xgft1_hi		;xp1 += partial
-
-	vpshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	vpxor	xp2, xgft2_hi		;xp2 += partial
-
-	vpshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	vpxor	xp3, xgft3_hi		;xp3 += partial
-
-
-	vmovdqu	xgft1_lo, [tmp+vskip3]		;Load array Dx{00}, Dx{01}, ..., Dx{0f}
-						;     "     Dx{00}, Dx{10}, ..., Dx{f0}
-	vmovdqu	xgft2_lo, [tmp+vskip1*4]	;Load array Ex{00}, Ex{01}, ..., Ex{0f}
-						;     "     Ex{00}, Ex{10}, ..., Ex{f0}
-	vmovdqu	xgft3_lo, [tmp+ptr]		;Load array Fx{00}, Fx{01}, ..., Fx{0f}
-						;     "     Fx{00}, Fx{10}, ..., Fx{f0}
-	add	tmp, 32
-	vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
-	vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
-	vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
-
-	vpshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	vpxor	xp4, xgft1_hi		;xp4 += partial
-
-	vpshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	vpxor	xp5, xgft2_hi		;xp5 += partial
-
-	vpshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	vpshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	vpxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	vpxor	xp6, xgft3_hi		;xp6 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-
-	mov	tmp, [dest+2*PS]
-	mov	ptr, [dest+3*PS]
-	mov	vec_i, [dest+4*PS]
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-	XSTR	[tmp+pos], xp3
-	mov	tmp, [dest+5*PS]
-	XSTR	[ptr+pos], xp4
-	XSTR	[vec_i+pos], xp5
-	XSTR	[tmp+pos], xp6
-
-	add	pos, 32			;Loop on 32 bytes at a time
-	cmp	pos, len
-	jle	.loop32
-
-	lea	tmp, [len + 32]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop32		;Do one more overlap pass
-
-.return_pass:
-	FUNC_RESTORE
-	mov	return, 0
-	ret
-
-.return_fail:
-	FUNC_RESTORE
-	mov	return, 1
-	ret
-
-endproc_frame
-
-section .data
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                   core, ver, snum
-slversion gf_6vect_dot_prod_avx2, 04,  03,  019a
--- a/pkg/encoding/erasure/gf-6vect-dot-prod-sse.asm
+++ b/pkg/encoding/erasure/gf-6vect-dot-prod-sse.asm
@ -1,361 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_6vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_6VECT_DOT_PROD_SSE _gf_6vect_dot_prod_sse
-%else
- %define GF_6VECT_DOT_PROD_SSE gf_6vect_dot_prod_sse
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r13		; must be saved and restored
- %define tmp4  r12		; must be saved and restored
- %define tmp5  r14		; must be saved and restored
- %define tmp6  r15		; must be saved and restored
- %define return rax
- %define PS 8
- %define LOG_PS 3
-
- %define func(x) x:
- %macro FUNC_SAVE 0
-	push	r12
-	push	r13
-	push	r14
-	push	r15
- %endmacro
- %macro FUNC_RESTORE 0
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
- %endmacro
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved, loaded and restored
- %define arg5   r15 		; must be saved and restored
- %define tmp    r11
- %define tmp2   r10
- %define tmp3   r13		; must be saved and restored
- %define tmp4   r14		; must be saved and restored
- %define tmp5   rdi		; must be saved and restored
- %define tmp6   rsi		; must be saved and restored
- %define return rax
- %define PS     8
- %define LOG_PS 3
- %define stack_size  10*16 + 7*8		; must be an odd multiple of 8
- %define arg(x)      [rsp + stack_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	save_xmm128	xmm6, 0*16
-	save_xmm128	xmm7, 1*16
-	save_xmm128	xmm8, 2*16
-	save_xmm128	xmm9, 3*16
-	save_xmm128	xmm10, 4*16
-	save_xmm128	xmm11, 5*16
-	save_xmm128	xmm12, 6*16
-	save_xmm128	xmm13, 7*16
-	save_xmm128	xmm14, 8*16
-	save_xmm128	xmm15, 9*16
-	save_reg	r12,  10*16 + 0*8
-	save_reg	r13,  10*16 + 1*8
-	save_reg	r14,  10*16 + 2*8
-	save_reg	r15,  10*16 + 3*8
-	save_reg	rdi,  10*16 + 4*8
-	save_reg	rsi,  10*16 + 5*8
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	movdqa	xmm6, [rsp + 0*16]
-	movdqa	xmm7, [rsp + 1*16]
-	movdqa	xmm8, [rsp + 2*16]
-	movdqa	xmm9, [rsp + 3*16]
-	movdqa	xmm10, [rsp + 4*16]
-	movdqa	xmm11, [rsp + 5*16]
-	movdqa	xmm12, [rsp + 6*16]
-	movdqa	xmm13, [rsp + 7*16]
-	movdqa	xmm14, [rsp + 8*16]
-	movdqa	xmm15, [rsp + 9*16]
-	mov	r12,  [rsp + 10*16 + 0*8]
-	mov	r13,  [rsp + 10*16 + 1*8]
-	mov	r14,  [rsp + 10*16 + 2*8]
-	mov	r15,  [rsp + 10*16 + 3*8]
-	mov	rdi,  [rsp + 10*16 + 4*8]
-	mov	rsi,  [rsp + 10*16 + 5*8]
-	add	rsp, stack_size
- %endmacro
-%endif
-
-%define len    arg0
-%define vec    arg1
-%define mul_array arg2
-%define	src    arg3
-%define dest   arg4
-%define ptr    arg5
-%define vec_i  tmp2
-%define dest1  tmp3
-%define dest2  tmp4
-%define vskip1 tmp5
-%define vskip3 tmp6
-%define pos    return
-
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR movdqu
- %define XSTR movdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR movdqa
-  %define XSTR movdqa
- %else
-  %define XLDR movntdqa
-  %define XSTR movntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f   xmm15
-%define xgft1_lo  xmm14
-%define xgft1_hi  xmm13
-%define xgft2_lo  xmm12
-%define xgft2_hi  xmm11
-%define xgft3_lo  xmm10
-%define xgft3_hi  xmm9
-%define x0     xmm0
-%define xtmpa  xmm1
-%define xp1    xmm2
-%define xp2    xmm3
-%define xp3    xmm4
-%define xp4    xmm5
-%define xp5    xmm6
-%define xp6    xmm7
-
-align 16
-global GF_6VECT_DOT_PROD_SSE:function
-func(GF_6VECT_DOT_PROD_SSE)
-	FUNC_SAVE
-	sub	len, 16
-	jl	.return_fail
-	xor	pos, pos
-	movdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-	mov	vskip1, vec
-	imul	vskip1, 32
-	mov	vskip3, vec
-	imul	vskip3, 96
-	sal	vec, LOG_PS		;vec *= PS. Make vec_i count by PS
-	mov	dest1, [dest]
-	mov	dest2, [dest+PS]
-
-
-.loop16:
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-	pxor	xp1, xp1
-	pxor	xp2, xp2
-	pxor	xp3, xp3
-	pxor	xp4, xp4
-	pxor	xp5, xp5
-	pxor	xp6, xp6
-
-.next_vect:
-	mov	ptr, [src+vec_i]
-	add	vec_i, PS
-	XLDR	x0, [ptr+pos]		;Get next source vector
-
-	movdqu	xgft1_lo, [tmp]			;Load array Ax{00}, Ax{01}, ..., Ax{0f}
-	movdqu	xgft1_hi, [tmp+16]		;     "     Ax{00}, Ax{10}, ..., Ax{f0}
-	movdqu	xgft2_lo, [tmp+vskip1*1]	;Load array Bx{00}, Bx{01}, ..., Bx{0f}
-	movdqu	xgft2_hi, [tmp+vskip1*1+16]	;     "     Bx{00}, Bx{10}, ..., Bx{f0}
-	movdqu	xgft3_lo, [tmp+vskip1*2]	;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-	movdqu	xgft3_hi, [tmp+vskip1*2+16]	;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-	lea	ptr, [vskip1 + vskip1*4]	;ptr = vskip5
-
-	movdqa	xtmpa, x0		;Keep unshifted copy of src
-	psraw	x0, 4			;Shift to put high nibble into bits 4-0
-	pand	x0, xmask0f		;Mask high src nibble in bits 4-0
-	pand	xtmpa, xmask0f		;Mask low src nibble in bits 4-0
-
-	pshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	pxor	xp1, xgft1_hi		;xp1 += partial
-
-	pshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	pxor	xp2, xgft2_hi		;xp2 += partial
-
-	pshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	pxor	xp3, xgft3_hi		;xp3 += partial
-
-
-	movdqu	xgft1_lo, [tmp+vskip3]		;Load array Dx{00}, Dx{01}, ..., Dx{0f}
-	movdqu	xgft1_hi, [tmp+vskip3+16]	;     "     Dx{00}, Dx{10}, ..., Dx{f0}
-	movdqu	xgft2_lo, [tmp+vskip1*4]	;Load array Ex{00}, Ex{01}, ..., Ex{0f}
-	movdqu	xgft2_hi, [tmp+vskip1*4+16]	;     "     Ex{00}, Ex{10}, ..., Ex{f0}
-	movdqu	xgft3_lo, [tmp+ptr]		;Load array Fx{00}, Fx{01}, ..., Fx{0f}
-	movdqu	xgft3_hi, [tmp+ptr+16]		;     "     Fx{00}, Fx{10}, ..., Fx{f0}
-	add	tmp, 32
-
-
-	pshufb	xgft1_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft1_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft1_hi, xgft1_lo	;GF add high and low partials
-	pxor	xp4, xgft1_hi		;xp4 += partial
-
-	pshufb	xgft2_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft2_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft2_hi, xgft2_lo	;GF add high and low partials
-	pxor	xp5, xgft2_hi		;xp5 += partial
-
-	pshufb	xgft3_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft3_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft3_hi, xgft3_lo	;GF add high and low partials
-	pxor	xp6, xgft3_hi		;xp6 += partial
-
-	cmp	vec_i, vec
-	jl	.next_vect
-
-
-	mov	tmp, [dest+2*PS]
-	mov	ptr, [dest+3*PS]
-	mov	vec_i, [dest+4*PS]
-
-	XSTR	[dest1+pos], xp1
-	XSTR	[dest2+pos], xp2
-	XSTR	[tmp+pos], xp3
-	mov	tmp, [dest+5*PS]
-	XSTR	[ptr+pos], xp4
-	XSTR	[vec_i+pos], xp5
-	XSTR	[tmp+pos], xp6
-
-	add	pos, 16			;Loop on 16 bytes at a time
-	cmp	pos, len
-	jle	.loop16
-
-	lea	tmp, [len + 16]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop16		;Do one more overlap pass
-
-.return_pass:
-	FUNC_RESTORE
-	mov	return, 0
-	ret
-
-.return_fail:
-	FUNC_RESTORE
-	mov	return, 1
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-mask0f:	ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                  core, ver, snum
-slversion GF_6VECT_DOT_PROD_SSE, 00,  03,  0066
--- a/pkg/encoding/erasure/gf-vect-dot-prod-avx.asm
+++ b/pkg/encoding/erasure/gf-vect-dot-prod-avx.asm
@ -1,221 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_vect_dot_prod_avx(len, vec, *g_tbls, **buffs, *dest);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
-%else
- %define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r9
- %define return rax
- %define PS 8
- %define func(x) x:
- %define FUNC_SAVE
- %define FUNC_RESTORE
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r9
- %define return rax
- %define PS 8
- %define func(x) x:
- %define FUNC_SAVE
- %define FUNC_RESTORE
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved and loaded
- %define tmp    r11
- %define tmp2   r10
- %define tmp3   rdi 		; must be saved and loaded
- %define return rax
- %define PS 8
- %define frame_size 2*8
- %define arg(x)      [rsp + frame_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	rex_push_reg	r12
-	push_reg	rdi
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	pop	rdi
-	pop	r12
- %endmacro
-%endif
-
-
-%define len   arg0
-%define vec   arg1
-%define mul_array arg2
-%define	src   arg3
-%define dest  arg4
-
-%define vec_i tmp2
-%define ptr   tmp3
-%define pos   return
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR vmovdqu
- %define XSTR vmovdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR vmovdqa
-  %define XSTR vmovdqa
- %else
-  %define XLDR vmovntdqa
-  %define XSTR vmovntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f  xmm5
-%define xgft_lo  xmm4
-%define xgft_hi  xmm3
-
-%define x0     xmm0
-%define xtmpa  xmm1
-%define xp     xmm2
-
-align 16
-global GF_VECT_DOT_PROD_AVX:function
-func(GF_VECT_DOT_PROD_AVX)
-	FUNC_SAVE
-	sub	len, 16
-	jl	.return_fail
-	xor	pos, pos
-	vmovdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-
-.loop16:
-	vpxor	xp, xp
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-
-.next_vect:
-	mov	ptr, [src+vec_i*PS]
-	vmovdqu	xgft_lo, [tmp]		;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-	vmovdqu	xgft_hi, [tmp+16]	;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-	XLDR	x0, [ptr+pos]		;Get next source vector
-	add	tmp, 32
-	add	vec_i, 1
-
-	vpand	xtmpa, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-
-	vpshufb	xgft_hi, xgft_hi, x0	;Lookup mul table of high nibble
-	vpshufb	xgft_lo, xgft_lo, xtmpa	;Lookup mul table of low nibble
-	vpxor	xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials
-	vpxor	xp, xp, xgft_hi		;xp += partial
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	XSTR	[dest+pos], xp
-	add	pos, 16			;Loop on 16 bytes at a time
-	cmp	pos, len
-	jle	.loop16
-
-	lea	tmp, [len + 16]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop16		;Do one more overlap pass
-
-.return_pass:
-	mov	return, 0
-	FUNC_RESTORE
-	ret
-
-.return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-
-poly:
-mask0f:
-ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                 core, ver, snum
-slversion GF_VECT_DOT_PROD_AVX, 02,  03,  0061
--- a/pkg/encoding/erasure/gf-vect-dot-prod-avx2.asm
+++ b/pkg/encoding/erasure/gf-vect-dot-prod-avx2.asm
@ -1,228 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, *dest);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
-%else
- %define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp.w r11d
- %define tmp.b r11b
- %define tmp2  r10
- %define tmp3  r9
- %define return rax
- %define PS 8
- %define func(x) x:
- %define FUNC_SAVE
- %define FUNC_RESTORE
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
-
- %define tmp   r11
- %define tmp.w r11d
- %define tmp.b r11b
- %define tmp2  r10
- %define tmp3  r9
- %define return rax
- %define PS 8
- %define func(x) x:
- %define FUNC_SAVE
- %define FUNC_RESTORE
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved and loaded
- %define tmp    r11
- %define tmp.w  r11d
- %define tmp.b  r11b
- %define tmp2   r10
- %define tmp3   rdi 		; must be saved and loaded
- %define return rax
- %define PS 8
- %define frame_size 2*8
- %define arg(x)      [rsp + frame_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	rex_push_reg	r12
-	push_reg	rdi
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	pop	rdi
-	pop	r12
- %endmacro
-%endif
-
-
-%define len   arg0
-%define vec   arg1
-%define mul_array arg2
-%define	src   arg3
-%define dest  arg4
-
-%define vec_i tmp2
-%define ptr   tmp3
-%define pos   return
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR vmovdqu
- %define XSTR vmovdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR vmovdqa
-  %define XSTR vmovdqa
- %else
-  %define XLDR vmovntdqa
-  %define XSTR vmovntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f  ymm3
-%define xmask0fx xmm3
-%define xgft_lo  ymm4
-%define xgft_hi  ymm5
-
-%define x0     ymm0
-%define xtmpa  ymm1
-%define xp     ymm2
-
-align 16
-global GF_VECT_DOT_PROD_AVX2:function
-func(GF_VECT_DOT_PROD_AVX2)
-	FUNC_SAVE
-	sub	len, 32
-	jl	.return_fail
-	xor	pos, pos
-	mov	tmp.b, 0x0f
-	vpinsrb	xmask0fx, xmask0fx, tmp.w, 0
-	vpbroadcastb xmask0f, xmask0fx	;Construct mask 0x0f0f0f...
-
-.loop32:
-	vpxor	xp, xp
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-
-.next_vect:
-	mov	ptr, [src+vec_i*PS]
-
-	vmovdqu	xgft_lo, [tmp]		;Load array Cx{00}, Cx{01}, Cx{02}, ...
-					; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
-	vperm2i128 xgft_hi, xgft_lo, xgft_lo, 0x11 ; swapped to hi | hi
-	vperm2i128 xgft_lo, xgft_lo, xgft_lo, 0x00 ; swapped to lo | lo
-
-	XLDR	x0, [ptr+pos]		;Get next source vector
-	add	tmp, 32
-	add	vec_i, 1
-
-	vpand	xtmpa, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-
-	vpshufb	xgft_hi, xgft_hi, x0	;Lookup mul table of high nibble
-	vpshufb	xgft_lo, xgft_lo, xtmpa	;Lookup mul table of low nibble
-	vpxor	xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials
-	vpxor	xp, xp, xgft_hi		;xp += partial
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	XSTR	[dest+pos], xp
-	add	pos, 32			;Loop on 32 bytes at a time
-	cmp	pos, len
-	jle	.loop32
-
-	lea	tmp, [len + 32]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-32
-	jmp	.loop32		;Do one more overlap pass
-
-.return_pass:
-	mov	return, 0
-	FUNC_RESTORE
-	ret
-
-.return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                  core, ver, snum
-slversion GF_VECT_DOT_PROD_AVX2, 04,  03,  0190
--- a/pkg/encoding/erasure/gf-vect-dot-prod-sse.asm
+++ b/pkg/encoding/erasure/gf-vect-dot-prod-sse.asm
@ -1,217 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_vect_dot_prod_sse(len, vec, *g_tbls, **buffs, *dest);
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
-%else
- %define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r9
- %define return rax
- %define PS 8
- %define func(x) x:
- %define FUNC_SAVE
- %define FUNC_RESTORE
-%endif
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
-
- %define tmp   r11
- %define tmp2  r10
- %define tmp3  r9
- %define return rax
- %define PS 8
- %define func(x) x:
- %define FUNC_SAVE
- %define FUNC_RESTORE
-%endif
-
-%ifidn __OUTPUT_FORMAT__, win64
- %define arg0   rcx
- %define arg1   rdx
- %define arg2   r8
- %define arg3   r9
-
- %define arg4   r12 		; must be saved and loaded
- %define tmp    r11
- %define tmp2   r10
- %define tmp3   rdi 		; must be saved and loaded
- %define return rax
- %define PS 8
- %define frame_size 2*8
- %define arg(x)      [rsp + frame_size + PS + PS*x]
-
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	rex_push_reg	r12
-	push_reg	rdi
-	end_prolog
-	mov	arg4, arg(4)
- %endmacro
-
- %macro FUNC_RESTORE 0
-	pop	rdi
-	pop	r12
- %endmacro
-%endif
-
-
-%define len   arg0
-%define vec   arg1
-%define mul_array arg2
-%define	src   arg3
-%define dest  arg4
-
-%define vec_i tmp2
-%define ptr   tmp3
-%define pos   return
-
-
-%ifndef EC_ALIGNED_ADDR
-;;; Use Un-aligned load/store
- %define XLDR movdqu
- %define XSTR movdqu
-%else
-;;; Use Non-temporal load/stor
- %ifdef NO_NT_LDST
-  %define XLDR movdqa
-  %define XSTR movdqa
- %else
-  %define XLDR movntdqa
-  %define XSTR movntdq
- %endif
-%endif
-
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f  xmm5
-%define xgft_lo  xmm4
-%define xgft_hi  xmm3
-
-%define x0     xmm0
-%define xtmpa  xmm1
-%define xp     xmm2
-
-align 16
-global GF_VECT_DOT_PROD_SSE:function
-func(GF_VECT_DOT_PROD_SSE)
-	FUNC_SAVE
-	sub	len, 16
-	jl	.return_fail
-	xor	pos, pos
-	movdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-
-.loop16:
-	pxor	xp, xp
-	mov	tmp, mul_array
-	xor	vec_i, vec_i
-
-.next_vect:
-	mov	ptr, [src+vec_i*PS]
-	movdqu	xgft_lo, [tmp]		;Load array Cx{00}, Cx{01}, ..., Cx{0f}
-	movdqu	xgft_hi, [tmp+16]	;     "     Cx{00}, Cx{10}, ..., Cx{f0}
-	XLDR	x0, [ptr+pos]		;Get next source vector
-	add	tmp, 32
-	add	vec_i, 1
-	movdqa	xtmpa, x0		;Keep unshifted copy of src
-	psraw	x0, 4			;Shift to put high nibble into bits 4-0
-	pand	x0, xmask0f		;Mask high src nibble in bits 4-0
-	pand	xtmpa, xmask0f		;Mask low src nibble in bits 4-0
-	pshufb	xgft_hi, x0		;Lookup mul table of high nibble
-	pshufb	xgft_lo, xtmpa		;Lookup mul table of low nibble
-	pxor	xgft_hi, xgft_lo	;GF add high and low partials
-	pxor	xp, xgft_hi		;xp += partial
-	cmp	vec_i, vec
-	jl	.next_vect
-
-	XSTR	[dest+pos], xp
-
-	add	pos, 16			;Loop on 16 bytes at a time
-	cmp	pos, len
-	jle	.loop16
-
-	lea	tmp, [len + 16]
-	cmp	pos, tmp
-	je	.return_pass
-
-	;; Tail len
-	mov	pos, len	;Overlapped offset length-16
-	jmp	.loop16		;Do one more overlap pass
-
-.return_pass:
-	mov	return, 0
-	FUNC_RESTORE
-	ret
-
-.return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-mask0f:	ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func                 core, ver, snum
-slversion GF_VECT_DOT_PROD_SSE, 00,  03,  0060
--- a/pkg/encoding/erasure/gf-vect-mul-avx.asm
+++ b/pkg/encoding/erasure/gf-vect-mul-avx.asm
@ -1,190 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_vect_mul_avx(len, mul_array, src, dest)
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_VECT_MUL_AVX _gf_vect_mul_avx
-%else
- %define GF_VECT_MUL_AVX gf_vect_mul_avx
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
- %define tmp   r11
- %define return rax
- %define func(x) x:
- %define FUNC_SAVE
- %define FUNC_RESTORE
-
-%elifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
- %define tmp   r11
- %define return rax
- %define func(x) x:
- %define FUNC_SAVE
- %define FUNC_RESTORE
-
-%elifidn __OUTPUT_FORMAT__, win64
- %define arg0  rcx
- %define arg1  rdx
- %define arg2  r8
- %define arg3  r9
- %define return rax
- %define stack_size  5*16 + 8 	; must be an odd multiple of 8
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	save_xmm128	xmm6, 0*16
-	save_xmm128	xmm7, 1*16
-	save_xmm128	xmm13, 2*16
-	save_xmm128	xmm14, 3*16
-	save_xmm128	xmm15, 4*16
-	end_prolog
- %endmacro
-
- %macro FUNC_RESTORE 0
-	vmovdqa	xmm6, [rsp + 0*16]
-	vmovdqa	xmm7, [rsp + 1*16]
-	vmovdqa	xmm13, [rsp + 2*16]
-	vmovdqa	xmm14, [rsp + 3*16]
-	vmovdqa	xmm15, [rsp + 4*16]
-	add	rsp, stack_size
- %endmacro
-
-%endif
-
-
-%define len   arg0
-%define mul_array arg1
-%define	src   arg2
-%define dest  arg3
-%define pos   return
-
-
-;;; Use Non-temporal load/stor
-%ifdef NO_NT_LDST
- %define XLDR vmovdqa
- %define XSTR vmovdqa
-%else
- %define XLDR vmovntdqa
- %define XSTR vmovntdq
-%endif
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f  xmm15
-%define xgft_lo  xmm14
-%define xgft_hi  xmm13
-
-%define x0     xmm0
-%define xtmp1a xmm1
-%define xtmp1b xmm2
-%define xtmp1c xmm3
-%define x1     xmm4
-%define xtmp2a xmm5
-%define xtmp2b xmm6
-%define xtmp2c xmm7
-
-align 16
-global GF_VECT_MUL_AVX:function
-func(GF_VECT_MUL_AVX)
-	FUNC_SAVE
-	mov	pos, 0
-	vmovdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-	vmovdqu	xgft_lo, [mul_array]	;Load array Cx{00}, Cx{01}, Cx{02}, ...
-	vmovdqu	xgft_hi, [mul_array+16]	; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
-
-loop32:
-	XLDR	x0, [src+pos]		;Get next source vector
-	XLDR	x1, [src+pos+16]	;Get next source vector + 16B ahead
-	add	pos, 32			;Loop on 16 bytes at a time
-	cmp	pos, len
-	vpand	xtmp1a, x0, xmask0f	;Mask low src nibble in bits 4-0
-	vpand	xtmp2a, x1, xmask0f
-	vpsraw	x0, x0, 4		;Shift to put high nibble into bits 4-0
-	vpsraw	x1, x1, 4
-	vpand	x0, x0, xmask0f		;Mask high src nibble in bits 4-0
-	vpand	x1, x1, xmask0f
-	vpshufb	xtmp1b, xgft_hi, x0	;Lookup mul table of high nibble
-	vpshufb	xtmp1c, xgft_lo, xtmp1a	;Lookup mul table of low nibble
-	vpshufb	xtmp2b, xgft_hi, x1	;Lookup mul table of high nibble
-	vpshufb	xtmp2c, xgft_lo, xtmp2a	;Lookup mul table of low nibble
-	vpxor	xtmp1b, xtmp1b, xtmp1c	;GF add high and low partials
-	vpxor	xtmp2b, xtmp2b, xtmp2c
-	XSTR	[dest+pos-32], xtmp1b	;Store result
-	XSTR	[dest+pos-16], xtmp2b	;Store +16B result
-	jl	loop32
-
-
-return_pass:
-	FUNC_RESTORE
-	sub	pos, len
-	ret
-
-return_fail:
-	FUNC_RESTORE
-	mov	return, 1
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-
-mask0f:
-ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func             core, ver, snum
-slversion GF_VECT_MUL_AVX, 01,   02,  0036
--- a/pkg/encoding/erasure/gf-vect-mul-sse.asm
+++ b/pkg/encoding/erasure/gf-vect-mul-sse.asm
@ -1,196 +0,0 @@
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;    * Redistributions of source code must retain the above copyright
-;      notice, this list of conditions and the following disclaimer.
-;    * Redistributions in binary form must reproduce the above copyright
-;      notice, this list of conditions and the following disclaimer in
-;      the documentation and/or other materials provided with the
-;      distribution.
-;    * Neither the name of Intel Corporation nor the names of its
-;      contributors may be used to endorse or promote products derived
-;      from this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;;;
-;;; gf_vect_mul_sse(len, mul_array, src, dest)
-;;;
-;;; Author: Gregory Tucker
-
-%ifidn __OUTPUT_FORMAT__, macho64
- %define GF_VECT_MUL_SSE _gf_vect_mul_sse
-%else
- %define GF_VECT_MUL_SSE gf_vect_mul_sse
-%endif
-
-%ifidn __OUTPUT_FORMAT__, elf64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
- %define tmp   r11
- %define return rax
- %define func(x) x:
- %define FUNC_SAVE
- %define FUNC_RESTORE
-
-%elifidn __OUTPUT_FORMAT__, macho64
- %define arg0  rdi
- %define arg1  rsi
- %define arg2  rdx
- %define arg3  rcx
- %define arg4  r8
- %define arg5  r9
- %define tmp   r11
- %define return rax
- %define func(x) x:
- %define FUNC_SAVE
- %define FUNC_RESTORE
-
-%elifidn __OUTPUT_FORMAT__, win64
- %define arg0  rcx
- %define arg1  rdx
- %define arg2  r8
- %define arg3  r9
- %define return rax
- %define stack_size  5*16 + 8 	; must be an odd multiple of 8
- %define func(x) proc_frame x
- %macro FUNC_SAVE 0
-	alloc_stack	stack_size
-	save_xmm128	xmm6, 0*16
-	save_xmm128	xmm7, 1*16
-	save_xmm128	xmm13, 2*16
-	save_xmm128	xmm14, 3*16
-	save_xmm128	xmm15, 4*16
-	end_prolog
- %endmacro
-
- %macro FUNC_RESTORE 0
-	movdqa	xmm6, [rsp + 0*16]
-	movdqa	xmm7, [rsp + 1*16]
-	movdqa	xmm13, [rsp + 2*16]
-	movdqa	xmm14, [rsp + 3*16]
-	movdqa	xmm15, [rsp + 4*16]
-	add	rsp, stack_size
- %endmacro
-
-%endif
-
-
-%define len    arg0
-%define mul_array arg1
-%define	src    arg2
-%define dest   arg3
-%define pos    return
-
-
-;;; Use Non-temporal load/stor
-%ifdef NO_NT_LDST
- %define XLDR movdqa
- %define XSTR movdqa
-%else
- %define XLDR movntdqa
- %define XSTR movntdq
-%endif
-
-default rel
-
-[bits 64]
-section .text
-
-%define xmask0f  xmm15
-%define xgft_lo  xmm14
-%define xgft_hi  xmm13
-
-%define x0     xmm0
-%define xtmp1a xmm1
-%define xtmp1b xmm2
-%define xtmp1c xmm3
-%define x1     xmm4
-%define xtmp2a xmm5
-%define xtmp2b xmm6
-%define xtmp2c xmm7
-
-
-align 16
-global GF_VECT_MUL_SSE:function
-func(GF_VECT_MUL_SSE)
-	FUNC_SAVE
-	mov	pos, 0
-	movdqa	xmask0f, [mask0f]	;Load mask of lower nibble in each byte
-	movdqu	xgft_lo, [mul_array]	;Load array Cx{00}, Cx{01}, Cx{02}, ...
-	movdqu	xgft_hi, [mul_array+16]	; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
-
-loop32:
-	XLDR	x0, [src+pos]		;Get next source vector
-	XLDR	x1, [src+pos+16]	;Get next source vector + 16B ahead
-	movdqa	xtmp1b, xgft_hi		;Reload const array registers
-	movdqa	xtmp1c, xgft_lo
-	movdqa	xtmp2b, xgft_hi
-	movdqa	xtmp2c, xgft_lo
-	movdqa	xtmp1a, x0		;Keep unshifted copy of src
-	movdqa	xtmp2a, x1
-	psraw	x0, 4			;Shift to put high nibble into bits 4-0
-	psraw	x1, 4
-	pand	xtmp1a, xmask0f		;Mask low src nibble in bits 4-0
-	pand	xtmp2a, xmask0f
-	pand	x0, xmask0f		;Mask high src nibble in bits 4-0
-	pand	x1, xmask0f
-	pshufb	xtmp1b, x0		;Lookup mul table of high nibble
-	pshufb	xtmp1c, xtmp1a		;Lookup mul table of low nibble
-	pshufb	xtmp2b, x1
-	pshufb	xtmp2c, xtmp2a
-	pxor	xtmp1b, xtmp1c		;GF add high and low partials
-	pxor	xtmp2b, xtmp2c
-	XSTR	[dest+pos], xtmp1b 	;Store result
-	XSTR	[dest+pos+16], xtmp2b	;Store +16B result
-	add	pos, 32			;Loop on 32 bytes at at time
-	cmp	pos, len
-	jl	loop32
-
-
-return_pass:
-	sub	pos, len
-	FUNC_RESTORE
-	ret
-
-return_fail:
-	mov	return, 1
-	FUNC_RESTORE
-	ret
-
-endproc_frame
-
-section .data
-
-align 16
-mask0f:
-ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
-
-%macro slversion 4
-global %1_slver_%2%3%4
-global %1_slver
-%1_slver:
-%1_slver_%2%3%4:
-	dw 0x%4
-	db 0x%3, 0x%2
-%endmacro
-;;;       func        core, ver, snum
-slversion GF_VECT_MUL_SSE, 00,   02,  0034
--- a/pkg/encoding/erasure/stdint.go
+++ b/pkg/encoding/erasure/stdint.go
@ -1,38 +0,0 @@
-/*
- * Minimalist Object Storage, (C) 2014 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package erasure
-
-//
-// int sizeInt()
-// {
-//      return sizeof(int);
-// }
-import "C"
-import "unsafe"
-
-var (
-	// See http://golang.org/ref/spec#Numeric_types
-	sizeInt = int(C.sizeInt())
-	// SizeInt8  is the byte size of a int8.
-	sizeInt8 = int(unsafe.Sizeof(int8(0)))
-	// SizeInt16 is the byte size of a int16.
-	sizeInt16 = int(unsafe.Sizeof(int16(0)))
-	// SizeInt32 is the byte size of a int32.
-	sizeInt32 = int(unsafe.Sizeof(int32(0)))
-	// SizeInt64 is the byte size of a int64.
-	sizeInt64 = int(unsafe.Sizeof(int64(0)))
-)
--- a/pkg/encoding/erasure/vandermonde_test.go
+++ b/pkg/encoding/erasure/vandermonde_test.go
@ -1,66 +0,0 @@
-/*
- * Minimalist Object Storage, (C) 2014 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package erasure
-
-import (
-	"bytes"
-
-	. "github.com/minio-io/check"
-)
-
-func corruptChunks(chunks [][]byte, errorIndex []int) [][]byte {
-	for _, err := range errorIndex {
-		chunks[err] = nil
-	}
-	return chunks
-}
-
-func (s *MySuite) TestVanderMondeEncodeDecodeFailure(c *C) {
-	ep, _ := ValidateParams(k, m, Vandermonde)
-
-	data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
-
-	e := NewErasure(ep)
-	chunks, err := e.Encode(data)
-	c.Assert(err, IsNil)
-
-	errorIndex := []int{0, 3, 5, 9, 11, 13}
-	chunks = corruptChunks(chunks, errorIndex)
-
-	_, err = e.Decode(chunks, len(data))
-	c.Assert(err, Not(IsNil))
-}
-
-func (s *MySuite) TestVanderMondeEncodeDecodeSuccess(c *C) {
-	ep, _ := ValidateParams(k, m, Vandermonde)
-
-	data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
-
-	e := NewErasure(ep)
-	chunks, err := e.Encode(data)
-	c.Assert(err, IsNil)
-
-	errorIndex := []int{0, 3, 5, 9, 13}
-	chunks = corruptChunks(chunks, errorIndex)
-
-	recoveredData, err := e.Decode(chunks, len(data))
-	c.Assert(err, IsNil)
-
-	if !bytes.Equal(recoveredData, data) {
-		c.Fatalf("Recovered data mismatches with original data")
-	}
-}