Add erasure coding and decoding using Intel Storage Acceleration library

- move contrib/erasure --> contrib/isal - bring in low level 'isal' package for Go for exposing C functions - Implement Erasure 'encoding' Supports - Reed Solomon Codes, Cauchy Codes - Implement Erasure 'decoding' Supports - Reed Solomon Codes, Cauchy Codes - Renames Minios -> Minio at all the references
2025-12-01 22:02:33 -05:00 · 2014-11-05 03:09:40 -08:00
parent cbce7fd66f
commit 1e7515a7df
85 changed files with 839 additions and 5 deletions
--- a/erasure/isal/include/ec-base.h
+++ b/erasure/isal/include/ec-base.h
--- a/erasure/isal/include/erasure-code.h
+++ b/erasure/isal/include/erasure-code.h
@@ -0,0 +1,659 @@
+/**********************************************************************
+  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#ifndef _ERASURE_CODE_H_
+#define _ERASURE_CODE_H_
+
+/**
+ *  @file erasure_code.h
+ *  @brief Interface to functions supporting erasure code encode and decode.
+ *
+ *  This file defines the interface to optimized functions used in erasure
+ *  codes.  Encode and decode of erasures in GF(2^8) are made by calculating the
+ *  dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
+ *  set of coefficients.  Values for the coefficients are determined by the type
+ *  of erasure code.  Using a general dot product means that any sequence of
+ *  coefficients may be used including erasure codes based on random
+ *  coefficients.
+ *  Multiple versions of dot product are supplied to calculate 1-6 output
+ *  vectors in one pass.
+ *  Base GF multiply and divide functions can be sped up by defining
+ *  GF_LARGE_TABLES at the expense of memory size.
+ *
+ */
+
+#include "gf-vect-mul.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Initialize tables for fast Erasure Code encode and decode.
+ *
+ * Generates the expanded tables needed for fast encode or decode for erasure
+ * codes on blocks of data.  32bytes is generated for each input coefficient.
+ *
+ * @param k      The number of vector sources or rows in the generator matrix
+ *               for coding.
+ * @param rows   The number of output vectors to concurrently encode/decode.
+ * @param a      Pointer to sets of arrays of input coefficients used to encode
+ *               or decode data.
+ * @param gftbls Pointer to start of space for concatenated output tables
+ *               generated from input coefficients.  Must be of size 32*k*rows.
+ * @returns none
+ */
+
+void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data.
+ *
+ * Given a list of source data blocks, generate one or multiple blocks of
+ * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
+ * suitable set of coefficients, this function will perform the fast generation
+ * or decoding of Reed-Solomon type erasure codes.
+ *
+ * @requires SSE4.1
+ * @param len    Length of each block of data (vector) of source or dest data.
+ * @param k      The number of vector sources or rows in the generator matrix
+ * 		 for coding.
+ * @param rows   The number of output vectors to concurrently encode/decode.
+ * @param gftbls Pointer to array of input tables generated from coding
+ *               coefficients in ec_init_tables(). Must be of size 32*k*rows
+ * @param data   Array of pointers to source input buffers.
+ * @param coding Array of pointers to coded output buffers.
+ * @returns none
+ */
+
+void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, unsigned char **coding);
+
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
+ *
+ * Given a list of source data blocks, generate one or multiple blocks of
+ * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
+ * suitable set of coefficients, this function will perform the fast generation
+ * or decoding of Reed-Solomon type erasure codes.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * @param len    Length of each block of data (vector) of source or dest data.
+ * @param k      The number of vector sources or rows in the generator matrix
+ * 		 for coding.
+ * @param rows   The number of output vectors to concurrently encode/decode.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * 		 coefficients in ec_init_tables(). Must be of size 32*k*rows
+ * @param data   Array of pointers to source input buffers.
+ * @param coding Array of pointers to coded output buffers.
+ * @returns none
+ */
+
+void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, unsigned char **coding);
+
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
+ *
+ * Given a list of source data blocks, generate one or multiple blocks of
+ * encoded data as specified by a matrix of GF(2^8) coefficients.  When given a
+ * suitable set of coefficients, this function will perform the fast generation
+ * or decoding of Reed-Solomon type erasure codes.
+ *
+ * @param len    Length of each block of data (vector) of source or dest data.
+ * @param srcs   The number of vector sources or rows in the generator matrix
+ * 		 for coding.
+ * @param dests  The number of output vectors to concurrently encode/decode.
+ * @param v      Pointer to array of input tables generated from coding
+ * 		 coefficients in ec_init_tables(). Must be of size 32*k*rows
+ * @param src    Array of pointers to source input buffers.
+ * @param dest   Array of pointers to coded output buffers.
+ * @returns none
+ */
+
+void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, unsigned char **dest);
+
+
+/**
+ * @brief GF(2^8) vector dot product.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ *
+ * @requires SSE4.1
+ * @param len    Length of each vector in bytes. Must be >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ *               on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector dot product.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ *
+ * @requires AVX
+ * @param len    Length of each vector in bytes. Must be >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ *               on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector dot product.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ *
+ * @requires AVX2
+ * @param len    Length of each vector in bytes. Must be >= 32.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ *               on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector dot product with two outputs.
+ *
+ * Vector dot product optimized to calculate two ouputs at a time. Does two
+ * GF(2^8) dot products across each byte of the input array and two constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 2*32*vlen byte constant array based on the two sets of input coefficients.
+ *
+ * @requires SSE4.1
+ * @param len    Length of each vector in bytes. Must be >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with two outputs.
+ *
+ * Vector dot product optimized to calculate two ouputs at a time. Does two
+ * GF(2^8) dot products across each byte of the input array and two constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 2*32*vlen byte constant array based on the two sets of input coefficients.
+ *
+ * @requires AVX
+ * @param len    Length of each vector in bytes. Must be >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with two outputs.
+ *
+ * Vector dot product optimized to calculate two ouputs at a time. Does two
+ * GF(2^8) dot products across each byte of the input array and two constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 2*32*vlen byte constant array based on the two sets of input coefficients.
+ *
+ * @requires AVX2
+ * @param len    Length of each vector in bytes. Must be >= 32.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with three outputs.
+ *
+ * Vector dot product optimized to calculate three ouputs at a time. Does three
+ * GF(2^8) dot products across each byte of the input array and three constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 3*32*vlen byte constant array based on the three sets of input coefficients.
+ *
+ * @requires SSE4.1
+ * @param len    Length of each vector in bytes. Must be >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with three outputs.
+ *
+ * Vector dot product optimized to calculate three ouputs at a time. Does three
+ * GF(2^8) dot products across each byte of the input array and three constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 3*32*vlen byte constant array based on the three sets of input coefficients.
+ *
+ * @requires AVX
+ * @param len    Length of each vector in bytes. Must be >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with three outputs.
+ *
+ * Vector dot product optimized to calculate three ouputs at a time. Does three
+ * GF(2^8) dot products across each byte of the input array and three constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 3*32*vlen byte constant array based on the three sets of input coefficients.
+ *
+ * @requires AVX2
+ * @param len    Length of each vector in bytes. Must be >= 32.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with four outputs.
+ *
+ * Vector dot product optimized to calculate four ouputs at a time. Does four
+ * GF(2^8) dot products across each byte of the input array and four constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 4*32*vlen byte constant array based on the four sets of input coefficients.
+ *
+ * @requires SSE4.1
+ * @param len    Length of each vector in bytes. Must be >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with four outputs.
+ *
+ * Vector dot product optimized to calculate four ouputs at a time. Does four
+ * GF(2^8) dot products across each byte of the input array and four constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 4*32*vlen byte constant array based on the four sets of input coefficients.
+ *
+ * @requires AVX
+ * @param len    Length of each vector in bytes. Must be >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with four outputs.
+ *
+ * Vector dot product optimized to calculate four ouputs at a time. Does four
+ * GF(2^8) dot products across each byte of the input array and four constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 4*32*vlen byte constant array based on the four sets of input coefficients.
+ *
+ * @requires AVX2
+ * @param len    Length of each vector in bytes. Must be >= 32.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with five outputs.
+ *
+ * Vector dot product optimized to calculate five ouputs at a time. Does five
+ * GF(2^8) dot products across each byte of the input array and five constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 5*32*vlen byte constant array based on the five sets of input coefficients.
+ *
+ * @requires SSE4.1
+ * @param len    Length of each vector in bytes. Must >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with five outputs.
+ *
+ * Vector dot product optimized to calculate five ouputs at a time. Does five
+ * GF(2^8) dot products across each byte of the input array and five constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 5*32*vlen byte constant array based on the five sets of input coefficients.
+ *
+ * @requires AVX
+ * @param len    Length of each vector in bytes. Must >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with five outputs.
+ *
+ * Vector dot product optimized to calculate five ouputs at a time. Does five
+ * GF(2^8) dot products across each byte of the input array and five constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 5*32*vlen byte constant array based on the five sets of input coefficients.
+ *
+ * @requires AVX2
+ * @param len    Length of each vector in bytes. Must >= 32.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with six outputs.
+ *
+ * Vector dot product optimized to calculate six ouputs at a time. Does six
+ * GF(2^8) dot products across each byte of the input array and six constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 6*32*vlen byte constant array based on the six sets of input coefficients.
+ *
+ * @requires SSE4.1
+ * @param len    Length of each vector in bytes. Must be >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with six outputs.
+ *
+ * Vector dot product optimized to calculate six ouputs at a time. Does six
+ * GF(2^8) dot products across each byte of the input array and six constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 6*32*vlen byte constant array based on the six sets of input coefficients.
+ *
+ * @requires AVX
+ * @param len    Length of each vector in bytes. Must be >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product with six outputs.
+ *
+ * Vector dot product optimized to calculate six ouputs at a time. Does six
+ * GF(2^8) dot products across each byte of the input array and six constant
+ * sets of coefficients to produce each byte of the outputs. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 6*32*vlen byte constant array based on the six sets of input coefficients.
+ *
+ * @requires AVX2
+ * @param len    Length of each vector in bytes. Must be >= 32.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
+ *               based on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Array of pointers to destination data buffers.
+ * @returns none
+ */
+
+void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
+			unsigned char **src, unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector dot product, runs baseline version.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ *
+ * @param len    Length of each vector in bytes. Must be >= 16.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ *               on the array of input coefficients. Only elements 32*CONST*j + 1
+ *               of this array are used, where j = (0, 1, 2...) and CONST is the
+ *               number of elements in the array of input coefficients. The
+ *               elements used correspond to the original input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
+                        unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector dot product, runs appropriate version.
+ *
+ * Does a GF(2^8) dot product across each byte of the input array and a constant
+ * set of coefficients to produce each byte of the output. Can be used for
+ * erasure coding encode and decode. Function requires pre-calculation of a
+ * 32*vlen byte constant array based on the input coefficients.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * @param len    Length of each vector in bytes. Must be >= 32.
+ * @param vlen   Number of vector sources.
+ * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
+ *               on the array of input coefficients.
+ * @param src    Array of pointers to source inputs.
+ * @param dest   Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
+                        unsigned char **src, unsigned char *dest);
+
+/**********************************************************************
+ * The remaining are lib support functions used in GF(2^8) operations.
+ */
+
+/**
+ * @brief Single element GF(2^8) multiply.
+ *
+ * @param a  Multiplicand a
+ * @param b  Multiplicand b
+ * @returns  Product of a and b in GF(2^8)
+ */
+
+unsigned char gf_mul(unsigned char a, unsigned char b);
+
+/**
+ * @brief Single element GF(2^8) inverse.
+ *
+ * @param a  Input element
+ * @returns  Field element b such that a x b = {1}
+ */
+
+unsigned char gf_inv(unsigned char a);
+
+/**
+ * @brief Generate a matrix of coefficients to be used for encoding.
+ *
+ * Vandermonde matrix example of encoding coefficients where high portion of
+ * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
+ * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
+ * erasure encoding but does not guarantee invertable for every sub matrix.  For
+ * large k it is possible to find cases where the decode matrix chosen from
+ * sources and parity not in erasure are not invertable. Users may want to
+ * adjust for k > 5.
+ *
+ * @param a  [mxk] array to hold coefficients
+ * @param m  number of rows in matrix corresponding to srcs + parity.
+ * @param k  number of columns in matrix corresponding to srcs.
+ * @returns  none
+ */
+
+void gf_gen_rs_matrix(unsigned char *a, int m, int k);
+
+/**
+ * @brief Generate a Cauchy matrix of coefficients to be used for encoding.
+ *
+ * Cauchy matrix example of encoding coefficients where high portion of matrix
+ * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
+ * i:{0,k-1} j:{k,m-1}.  Any sub-matrix of a Cauchy matrix should be invertable.
+ *
+ * @param a  [mxk] array to hold coefficients
+ * @param m  number of rows in matrix corresponding to srcs + parity.
+ * @param k  number of columns in matrix corresponding to srcs.
+ * @returns  none
+ */
+
+void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
+
+/**
+ * @brief Invert a matrix in GF(2^8)
+ *
+ * @param in  input matrix
+ * @param out output matrix such that [in] x [out] = [I] - identity matrix
+ * @param n   size of matrix [nxn]
+ * @returns 0 successful, other fail on singular input matrix
+ */
+
+int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
+
+/*************************************************************/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //_ERASURE_CODE_H_
--- a/erasure/isal/include/erasure/tests.h
+++ b/erasure/isal/include/erasure/tests.h
@@ -0,0 +1,81 @@
+/**********************************************************************
+  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#ifndef __ERASURE_TESTS_H
+#define __ERASURE_TESTS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Use sys/time.h functions for time
+
+#include <sys/time.h>
+
+struct perf{
+	struct timeval tv;
+};
+
+
+inline int perf_start(struct perf *p)
+{
+	return gettimeofday(&(p->tv), 0);
+}
+inline int perf_stop(struct perf *p)
+{
+	return gettimeofday(&(p->tv), 0);
+}
+
+inline void perf_print(struct perf stop, struct perf start, long long dsize)
+{
+	long long secs = stop.tv.tv_sec - start.tv.tv_sec;
+	long long usecs = secs * 1000000 + stop.tv.tv_usec - start.tv.tv_usec;
+
+	printf("runtime = %10lld usecs", usecs);
+	if (dsize != 0) {
+#if 1 // not bug in printf for 32-bit
+		printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s\n", dsize/(1024*1024),
+			((double) usecs)/1000000, ((double) dsize) / (double)usecs);
+#else
+		printf(", bandwidth %lld MB ", dsize/(1024*1024));
+		printf("in %.4f sec ",(double)usecs/1000000);
+		printf("= %.2f MB/s\n", (double)dsize/usecs);
+#endif
+	}
+	else
+		printf("\n");
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __ERASURE_TESTS_H
--- a/erasure/isal/include/erasure/types.h
+++ b/erasure/isal/include/erasure/types.h
@@ -0,0 +1,80 @@
+/**********************************************************************
+  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+/**
+ *  @file  types.h
+ *  @brief Defines standard width types.
+ *
+ */
+
+#ifndef __ERASURE_TYPES_H
+#define __ERASURE_TYPES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef __unix__
+#ifdef __MINGW32__
+# include <_mingw.h>
+#endif
+typedef unsigned __int64 UINT64;
+typedef          __int64  INT64;
+typedef unsigned __int32 UINT32;
+typedef unsigned __int16 UINT16;
+typedef unsigned char    UINT8;
+#else
+typedef unsigned long int  UINT64;
+typedef          long int   INT64;
+typedef unsigned int       UINT32;
+typedef unsigned short int UINT16;
+typedef unsigned char      UINT8;
+#endif
+
+
+#ifdef __unix__
+# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
+# define __forceinline static inline
+#else
+# define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
+# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
+#endif
+
+#ifdef DEBUG
+# define DEBUG_PRINT(x) printf x
+#else
+# define DEBUG_PRINT(x) do {} while (0)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  //__ERASURE_TYPES_H
--- a/erasure/isal/include/gf-vect-mul.h
+++ b/erasure/isal/include/gf-vect-mul.h
@@ -0,0 +1,148 @@
+/**********************************************************************
+  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#ifndef _GF_VECT_MUL_H
+#define _GF_VECT_MUL_H
+
+/**
+ *  @file gf-vect-mul.h
+ *  @brief Interface to functions for vector (block) multiplication in GF(2^8).
+ *
+ *  This file defines the interface to routines used in fast RAID rebuild and
+ *  erasure codes.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /**
+ * @brief GF(2^8) vector multiply by constant.
+ *
+ * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
+ * is a single field element in GF(2^8). Can be used for RAID6 rebuild
+ * and partial write functions. Function requires pre-calculation of a
+ * 32-element constant array based on constant C. gftbl(C) = {C{00},
+ * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
+ * and src must be aligned to 32B.
+
+ * @requires SSE4.1
+ * @param len   Length of vector in bytes. Must be aligned to 32B.
+ * @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
+ * @param src   Pointer to src data array. Must be aligned to 32B.
+ * @param dest  Pointer to destination data array. Must be aligned to 32B.
+ * @returns 0 pass, other fail
+ */
+
+int gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest);
+
+
+ /**
+ * @brief GF(2^8) vector multiply by constant.
+ *
+ * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
+ * is a single field element in GF(2^8). Can be used for RAID6 rebuild
+ * and partial write functions. Function requires pre-calculation of a
+ * 32-element constant array based on constant C. gftbl(C) = {C{00},
+ * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
+ * and src must be aligned to 32B.
+
+ * @requires AVX
+ * @param len   Length of vector in bytes. Must be aligned to 32B.
+ * @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
+ * @param src   Pointer to src data array. Must be aligned to 32B.
+ * @param dest  Pointer to destination data array. Must be aligned to 32B.
+ * @returns 0 pass, other fail
+ */
+
+int gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest);
+
+
+/**
+ * @brief GF(2^8) vector multiply by constant, runs appropriate version.
+ *
+ * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
+ * is a single field element in GF(2^8). Can be used for RAID6 rebuild
+ * and partial write functions. Function requires pre-calculation of a
+ * 32-element constant array based on constant C. gftbl(C) = {C{00},
+ * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }.
+ * Len and src must be aligned to 32B.
+ *
+ * This function determines what instruction sets are enabled
+ * and selects the appropriate version at runtime.
+ *
+ * @param len   Length of vector in bytes. Must be aligned to 32B.
+ * @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
+ * @param src   Pointer to src data array. Must be aligned to 32B.
+ * @param dest  Pointer to destination data array. Must be aligned to 32B.
+ * @returns 0 pass, other fail
+ */
+
+int gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest);
+
+
+/**
+ * @brief Initialize 32-byte constant array for GF(2^8) vector multiply
+ *
+ * Calculates array {C{00}, C{01}, C{02}, ... , C{0f} }, {C{00}, C{10},
+ * C{20}, ... , C{f0} } as required by other fast vector multiply
+ * functions.
+ * @param c     Constant input.
+ * @param gftbl Table output.
+ */
+
+void gf_vect_mul_init(unsigned char c, unsigned char* gftbl);
+
+
+/**
+ * @brief GF(2^8) vector multiply by constant, runs baseline version.
+ *
+ * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
+ * is a single field element in GF(2^8). Can be used for RAID6 rebuild
+ * and partial write functions. Function requires pre-calculation of a
+ * 32-element constant array based on constant C. gftbl(C) = {C{00},
+ * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
+ * and src must be aligned to 32B.
+ *
+ * @param len   Length of vector in bytes. Must be aligned to 32B.
+ * @param a 	Pointer to 32-byte array of pre-calculated constants based on C.
+ * 		only use 2nd element is used.
+ * @param src   Pointer to src data array. Must be aligned to 32B.
+ * @param dest  Pointer to destination data array. Must be aligned to 32B.
+ */
+
+void gf_vect_mul_base(int len, unsigned char *a, unsigned char *src,
+			unsigned char *dest);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //_GF_VECT_MUL_H
--- a/erasure/isal/include/reg-sizes.asm
+++ b/erasure/isal/include/reg-sizes.asm
@@ -0,0 +1,96 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;  Copyright(c) 2011-2014 Intel Corporation All rights reserved.
+;
+;  Redistribution and use in source and binary forms, with or without
+;  modification, are permitted provided that the following conditions 
+;  are met:
+;    * Redistributions of source code must retain the above copyright
+;      notice, this list of conditions and the following disclaimer.
+;    * Redistributions in binary form must reproduce the above copyright
+;      notice, this list of conditions and the following disclaimer in
+;      the documentation and/or other materials provided with the
+;      distribution.
+;    * Neither the name of Intel Corporation nor the names of its
+;      contributors may be used to endorse or promote products derived
+;      from this software without specific prior written permission.
+;
+;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%define EFLAGS_HAS_CPUID        (1<<21)
+%define FLAG_CPUID1_ECX_CLMUL   (1<<1)
+%define FLAG_CPUID1_EDX_SSE2    (1<<26)
+%define FLAG_CPUID1_ECX_SSE3	(1)
+%define FLAG_CPUID1_ECX_SSE4_1  (1<<19)
+%define FLAG_CPUID1_ECX_SSE4_2  (1<<20)
+%define FLAG_CPUID1_ECX_POPCNT  (1<<23)
+%define FLAG_CPUID1_ECX_AESNI   (1<<25)
+%define FLAG_CPUID1_ECX_OSXSAVE (1<<27)
+%define FLAG_CPUID1_ECX_AVX     (1<<28)
+%define FLAG_CPUID1_EBX_AVX2    (1<<5)
+%define FLAG_XGETBV_EAX_XMM_YMM	0x6
+
+%define FLAG_CPUID1_EAX_AVOTON 0x000406d0
+
+; define d and w variants for registers
+
+%define	raxd	eax
+%define raxw	ax
+%define raxb	al
+
+%define	rbxd	ebx
+%define rbxw	bx
+%define rbxb	bl
+
+%define	rcxd	ecx
+%define rcxw	cx
+%define rcxb	cl
+
+%define	rdxd	edx
+%define rdxw	dx
+%define rdxb	dl
+
+%define	rsid	esi
+%define rsiw	si
+%define rsib	sil
+
+%define	rdid	edi
+%define rdiw	di
+%define rdib	dil
+
+%define	rbpd	ebp
+%define rbpw	bp
+%define rbpb	bpl
+
+%define ymm0x xmm0
+%define ymm1x xmm1
+%define ymm2x xmm2
+%define ymm3x xmm3
+%define ymm4x xmm4
+%define ymm5x xmm5
+%define ymm6x xmm6
+%define ymm7x xmm7
+%define ymm8x xmm8
+%define ymm9x xmm9
+%define ymm10x xmm10
+%define ymm11x xmm11
+%define ymm12x xmm12
+%define ymm13x xmm13
+%define ymm14x xmm14
+%define ymm15x xmm15
+
+%define DWORD(reg) reg %+ d
+%define WORD(reg)  reg %+ w
+%define BYTE(reg)  reg %+ b
+
+%define XWORD(reg) reg %+ x