Erasure restructuring

This commit is contained in:
Harshavardhana 2014-11-27 19:43:54 -08:00
parent d6b65f1f04
commit 922e6bac0b
10 changed files with 203 additions and 142 deletions

View File

@ -1,13 +1,8 @@
all: build test
.PHONY: all
test: cauchy vandermonde
cauchy:
@godep go test -test.run="TestCauchy*"
vandermonde:
@godep go test -test.run="TestVanderMonde*"
test:
@godep go test
isal/isal-l.so:
@$(MAKE) --quiet -C isal lib

View File

@ -17,6 +17,7 @@
package erasure
import (
"bytes"
. "gopkg.in/check.v1"
"testing"
)
@ -27,15 +28,19 @@ var _ = Suite(&MySuite{})
func Test(t *testing.T) { TestingT(t) }
func (s *MySuite) TestCachyEncode(c *C) {
ep, _ := ValidateParams(10, 5, 8, CAUCHY)
data := make([]byte, 1000)
_, length := Encode(data, ep)
func (s *MySuite) TestCauchyEncode(c *C) {
ep, _ := ParseEncoderParams(10, 5, CAUCHY)
data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
chunks, length := Encode(data, ep)
c.Logf("chunks length: %d;\nlength: %d\n", len(chunks), length)
c.Assert(length, Equals, len(data))
}
func (s *MySuite) TestCauchyDecode(c *C) {
ep, _ := ValidateParams(10, 5, 8, CAUCHY)
ep, _ := ParseEncoderParams(10, 5, CAUCHY)
data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
@ -49,7 +54,9 @@ func (s *MySuite) TestCauchyDecode(c *C) {
chunks[13] = nil
recovered_data, err := Decode(chunks, ep, length)
c.Assert(err, Not(IsNil))
c.Assert(err, IsNil)
c.Assert(recovered_data, DeepEquals, data)
if !bytes.Equal(data, recovered_data) {
c.Fatalf("Recovered data mismatches with original data")
}
}

View File

@ -23,10 +23,10 @@ import (
)
// Integer to Int conversion
func Int2cInt(src_err_list []int) *C.int {
SrcErrInt := int(unsafe.Sizeof(src_err_list[0]))
switch SrcErrInt {
case SizeInt:
func int2cInt(src_err_list []int) *C.int {
var SrcErrInt = int(unsafe.Sizeof(src_err_list[0]))
switch SizeInt {
case SrcErrInt:
return (*C.int)(unsafe.Pointer(&src_err_list[0]))
case SizeInt8:
Int8Array := make([]int8, len(src_err_list))

View File

@ -16,13 +16,13 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <erasure-code.h>
#include "matrix_decode.h"
#include "decode.h"
static int src_in_err (int r, int *src_err_list)
{
int i;
for (i = 0; src_err_list[i] != -1; i++) {
if (src_err_list[i] == r) {
@ -37,25 +37,25 @@ static int src_in_err (int r, int *src_err_list)
Generate decode matrix during the decoding phase
*/
int gf_gen_decode_matrix (int *src_err_list,
unsigned char *encode_matrix,
unsigned char *decode_matrix,
int k, int n, int errs,
size_t matrix_size)
int minio_init_decoder (int *src_err_list,
unsigned char *encode_matrix,
unsigned char **decode_matrix,
unsigned char **decode_tbls,
int k, int n, int errs)
{
int i, j, r, s, l, z;
unsigned char *input_matrix = NULL;
unsigned char *inverse_matrix = NULL;
unsigned char input_matrix[k * n];
unsigned char inverse_matrix[k * n];
unsigned char *tmp_decode_matrix;
unsigned char *tmp_decode_tbls;
input_matrix = malloc(k * n);
if (!input_matrix) {
tmp_decode_matrix = (unsigned char *) malloc (k * n);
if (!tmp_decode_matrix)
return -1;
}
inverse_matrix = malloc(matrix_size);
if (!inverse_matrix) {
tmp_decode_tbls = (unsigned char *) malloc (k * n * 32);
if (!tmp_decode_tbls)
return -1;
}
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err(r, src_err_list))
@ -74,7 +74,7 @@ int gf_gen_decode_matrix (int *src_err_list,
if (src_err_list[l] < k) {
// decoding matrix elements for data chunks
for (j = 0; j < k; j++) {
decode_matrix[k * l + j] =
tmp_decode_matrix[k * l + j] =
inverse_matrix[k *
src_err_list[l] + j];
}
@ -88,11 +88,14 @@ int gf_gen_decode_matrix (int *src_err_list,
encode_matrix[k *
src_err_list[l] + j]);
}
decode_matrix[k * l + i] = s;
tmp_decode_matrix[k * l + i] = s;
}
}
}
free(input_matrix);
free(inverse_matrix);
ec_init_tables(k, errs, tmp_decode_matrix, tmp_decode_tbls);
*decode_matrix = tmp_decode_matrix;
*decode_tbls = tmp_decode_tbls;
return 0;
}

View File

@ -15,7 +15,6 @@
*/
// +build linux
// amd64
package erasure
@ -25,7 +24,8 @@ package erasure
// #include <erasure-code.h>
// #include <stdlib.h>
//
// #include "matrix_decode.h"
// #include "decode.h"
// #include "encode.h"
import "C"
import (
"errors"
@ -37,17 +37,14 @@ func (e *Encoder) Decode(chunks [][]byte, length int) ([]byte, error) {
var decode_matrix *C.uchar
var decode_tbls *C.uchar
var matrix_size C.size_t
var decode_tbls_size C.size_t
k := int(e.p.k)
n := int(e.p.n)
n := int(e.p.k + e.p.m)
if len(chunks) != n {
return nil, errors.New(fmt.Sprintf("chunks length must be %d", n))
}
var chunk_size int = e.CalcChunkSize(length)
chunk_size := int(C.calc_chunk_size(e.k, C.uint(length)))
src_err_list := make([]int, n+1)
var err_count int = 0
@ -67,7 +64,7 @@ func (e *Encoder) Decode(chunks [][]byte, length int) ([]byte, error) {
return nil, errors.New("too many erasures requested, can't decode")
}
src_err_list_ptr := Int2cInt(src_err_list[:err_count])
src_err_list_ptr := int2cInt(src_err_list[:err_count])
for i := range chunks {
if chunks[i] == nil || len(chunks[i]) == 0 {
@ -75,36 +72,42 @@ func (e *Encoder) Decode(chunks [][]byte, length int) ([]byte, error) {
}
}
matrix_size = C.size_t(k * n)
decode_matrix = (*C.uchar)(unsafe.Pointer(C.malloc(matrix_size)))
C.minio_init_decoder(src_err_list_ptr, e.encode_matrix,
&decode_matrix, &decode_tbls,
e.k, e.k+e.m, C.int(err_count-1))
defer C.free(unsafe.Pointer(decode_matrix))
decode_tbls_size = C.size_t(k * n * 32)
decode_tbls = (*C.uchar)(unsafe.Pointer(C.malloc(decode_tbls_size)))
defer C.free(unsafe.Pointer(decode_tbls))
C.gf_gen_decode_matrix(src_err_list_ptr, e.encode_matrix,
decode_matrix, e.k, e.k+e.m, C.int(err_count-1), matrix_size)
C.ec_init_tables(e.k, C.int(err_count-1), decode_matrix, decode_tbls)
e.decode_matrix = decode_matrix
e.decode_tbls = decode_tbls
pointers := make([]*byte, n)
for i := range chunks {
pointers[i] = &chunks[i][0]
}
/*
// Pack recovery array as list of valid sources
// Its order must be the same as the order
// to generate matrix b in gf_gen_decode_matrix
var i int
for i = 0; i < e.p.k; i++ {
recov[i] = buffs[decode_index[i]]
}
*/
data := (**C.uchar)(unsafe.Pointer(&pointers[:k][0]))
coding := (**C.uchar)(unsafe.Pointer(&pointers[k:][0]))
C.ec_encode_data(C.int(matrix_size), e.k, C.int(err_count-1), e.decode_tbls,
C.ec_encode_data(C.int(chunk_size), e.k, C.int(err_count-1), decode_tbls,
data, coding)
recovered_output := make([]byte, 0, chunk_size*k)
for i := 0; i < k; i++ {
recovered_output = append(recovered_output, chunks[i]...)
}
e.decode_matrix = decode_matrix
e.decode_tbls = decode_tbls
return recovered_output[:length], nil
}

25
pkgs/erasure/decode.h Normal file
View File

@ -0,0 +1,25 @@
/*
* Mini Object Storage, (C) 2014 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __DECODE_H__
#define __DECODE_H__
int minio_init_decoder (int *src_err_list,
unsigned char *encoding_matrix,
unsigned char **decode_matrix,
unsigned char **decode_tbls,
int k, int n, int errs);
#endif /* __DECODE_H__ */

75
pkgs/erasure/encode.c Normal file
View File

@ -0,0 +1,75 @@
/*
* Mini Object Storage, (C) 2014 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdlib.h>
#include <stdio.h>
#include <erasure-code.h>
#include "encode.h"
void minio_init_encoder (int technique, int k, int m,
unsigned char **encode_matrix,
unsigned char **encode_tbls)
{
size_t encode_matrix_size;
size_t encode_tbls_size;
unsigned char *tmp_matrix, *tmp_tbls;
encode_matrix_size = k * (k + m);
encode_tbls_size = k * (k + m) * 32;
tmp_matrix = (unsigned char *) malloc (encode_matrix_size);
tmp_tbls = (unsigned char *) malloc (encode_tbls_size);
if (technique == 0) {
/*
Commonly used method for choosing coefficients in erasure
encoding but does not guarantee invertable for every sub
matrix. For large k it is possible to find cases where the
decode matrix chosen from sources and parity not in erasure
are not invertable. Users may want to adjust for k > 5.
-- Intel
*/
gf_gen_rs_matrix (tmp_matrix, k + m, k);
} else if (technique == 1) {
gf_gen_cauchy1_matrix (tmp_matrix, k + m, k);
}
ec_init_tables(k, m, &tmp_matrix[k * k], tmp_tbls);
*encode_matrix = tmp_matrix;
*encode_tbls = tmp_tbls;
}
unsigned int calc_chunk_size (int k, unsigned int split_len)
{
int alignment;
int remainder;
int padded_len;
alignment = k * SIMD_ALIGN;
remainder = split_len % alignment;
padded_len = split_len;
if (remainder) {
padded_len = split_len + (alignment - remainder);
}
return padded_len / k;
}
/*
void minio_encode (int k, int m, )
{
*/

View File

@ -15,7 +15,6 @@
*/
// +build linux
// amd64
package erasure
@ -25,10 +24,11 @@ package erasure
// #include <erasure-code.h>
// #include <stdlib.h>
//
// #include "cpufeatures.h"
// #include "encode.h"
import "C"
import (
"errors"
//"fmt"
"unsafe"
)
@ -38,24 +38,20 @@ const (
)
const (
K = 10
M = 3
ALIGN = 32
K = 10
M = 3
)
type EncoderParams struct {
k,
m,
w,
n,
technique int // cauchy or vandermonde matrix (RS)
}
type Encoder struct {
p *EncoderParams
k,
m,
w C.int
m C.int
encode_matrix,
encode_tbls,
decode_matrix,
@ -63,7 +59,7 @@ type Encoder struct {
}
// Parameter validation
func ValidateParams(k, m, w, technique int) (*EncoderParams, error) {
func ParseEncoderParams(k, m, technique int) (*EncoderParams, error) {
if k < 1 {
return nil, errors.New("k cannot be zero")
}
@ -76,14 +72,6 @@ func ValidateParams(k, m, w, technique int) (*EncoderParams, error) {
return nil, errors.New("(k + m) cannot be bigger than Galois field GF(2^8) - 1")
}
if 1<<uint(w) < k+m {
return nil, errors.New("Wordsize should be bigger than Galois field GF(2^8) - 1")
}
if w < 0 {
return nil, errors.New("Wordsize cannot be negative")
}
switch technique {
case VANDERMONDE:
break
@ -96,51 +84,25 @@ func ValidateParams(k, m, w, technique int) (*EncoderParams, error) {
return &EncoderParams{
k: k,
m: m,
w: w,
n: k + m,
technique: technique,
}, nil
}
func NewEncoder(ep *EncoderParams) *Encoder {
func newEncoder(ep *EncoderParams) *Encoder {
var k = C.int(ep.k)
var m = C.int(ep.m)
var w = C.int(ep.w)
var n = C.int(ep.n)
var encode_matrix *C.uchar
var encode_tbls *C.uchar
var matrix_size C.size_t
var encode_tbls_size C.size_t
matrix_size = C.size_t(k * n)
encode_matrix = (*C.uchar)(unsafe.Pointer(C.malloc(matrix_size)))
C.minio_init_encoder(C.int(ep.technique), k, m, &encode_matrix,
&encode_tbls)
defer C.free(unsafe.Pointer(encode_matrix))
encode_tbls_size = C.size_t(k * n * 32)
encode_tbls = (*C.uchar)(unsafe.Pointer(C.malloc(encode_tbls_size)))
defer C.free(unsafe.Pointer(encode_tbls))
if ep.technique == VANDERMONDE {
// Commonly used method for choosing coefficients in erasure encoding
// but does not guarantee invertable for every sub matrix. For large
// k it is possible to find cases where the decode matrix chosen from
// sources and parity not in erasure are not invertable. Users may
// want to adjust for k > 5.
// -- Intel
C.gf_gen_rs_matrix(encode_matrix, n, k)
} else if ep.technique == CAUCHY {
C.gf_gen_cauchy1_matrix(encode_matrix, n, k)
}
C.ec_init_tables(k, m, encode_matrix, encode_tbls)
return &Encoder{
p: ep,
k: k,
m: m,
w: w,
encode_matrix: encode_matrix,
encode_tbls: encode_tbls,
decode_matrix: nil,
@ -148,27 +110,15 @@ func NewEncoder(ep *EncoderParams) *Encoder {
}
}
func (e *Encoder) CalcChunkSize(block_len int) int {
var alignment int = ALIGN
var remainder = block_len % alignment
var chunk_size int
chunk_size = block_len
if remainder > 0 {
chunk_size = block_len + (alignment - remainder)
}
return chunk_size / e.p.k
}
func (e *Encoder) Encode(block []byte) ([][]byte, int) {
var block_len = len(block)
chunk_size := e.CalcChunkSize(block_len)
padded_len := chunk_size * e.p.k
chunk_size := int(C.calc_chunk_size(e.k, C.uint(block_len)))
chunk_len := chunk_size * e.p.k
pad_len := chunk_len - block_len
if (padded_len - block_len) > 0 {
s := make([]byte, (padded_len - block_len))
if pad_len > 0 {
s := make([]byte, pad_len)
// Expand with new padded blocks to the byte array
block = append(block, s...)
}
@ -178,8 +128,8 @@ func (e *Encoder) Encode(block []byte) ([][]byte, int) {
block = append(block, c...)
// Allocate chunks
chunks := make([][]byte, e.p.n)
pointers := make([]*byte, e.p.n)
chunks := make([][]byte, e.p.k+e.p.m)
pointers := make([]*byte, e.p.k+e.p.m)
var i int
// Add data blocks to chunks
@ -188,7 +138,7 @@ func (e *Encoder) Encode(block []byte) ([][]byte, int) {
pointers[i] = &chunks[i][0]
}
for i = e.p.k; i < e.p.n; i++ {
for i = e.p.k; i < (e.p.k + e.p.m); i++ {
chunks[i] = make([]byte, chunk_size)
pointers[i] = &chunks[i][0]
}

View File

@ -14,11 +14,12 @@
* limitations under the License.
*/
#ifndef __MATRIX_DECODE_H__
#define __MATRIX_DECODE_H__
#ifndef __ENCODE_H__
#define __ENCODE_H__
int gf_gen_decode_matrix (int *src_err_list,
unsigned char *encoding_matrix,
unsigned char *decode_matrix, int k, int n,
int errs, size_t matrix_size);
#endif /* __MATRIX_DECODE_H__ */
#define SIMD_ALIGN 32
void minio_init_encoder (int technique, int k, int m,
unsigned char **encode_matrix,
unsigned char **encode_tbls);
unsigned int calc_chunk_size (int k, unsigned int split_len);
#endif /* __ENCODE_H__ */

View File

@ -17,27 +17,27 @@
package erasure
import (
"bytes"
. "gopkg.in/check.v1"
)
func (s *MySuite) TestVanderMondeEncode(c *C) {
ep, _ := ValidateParams(10, 5, 8, VANDERMONDE)
p := NewEncoder(ep)
ep, _ := ParseEncoderParams(10, 5, VANDERMONDE)
data := make([]byte, 1000)
chunks, length := p.Encode(data)
data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
chunks, length := Encode(data, ep)
c.Logf("chunks length: %d;\nlength: %d\n", len(chunks), length)
c.Assert(length, Equals, len(data))
}
func (s *MySuite) TestVanderMondeDecode(c *C) {
ep, _ := ValidateParams(10, 5, 8, VANDERMONDE)
p := NewEncoder(ep)
ep, _ := ParseEncoderParams(10, 5, VANDERMONDE)
data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.")
chunks, length := p.Encode(data)
chunks, length := Encode(data, ep)
c.Logf("chunks length: %d", len(chunks))
c.Logf("length: %d", length)
c.Assert(length, Equals, len(data))
@ -48,8 +48,10 @@ func (s *MySuite) TestVanderMondeDecode(c *C) {
chunks[9] = nil
chunks[13] = nil
recovered_data, err := p.Decode(chunks, length)
c.Assert(err, Not(IsNil))
recovered_data, err := Decode(chunks, ep, length)
c.Assert(err, IsNil)
c.Assert(recovered_data, DeepEquals, data)
if !bytes.Equal(recovered_data, data) {
c.Fatalf("Recovered data mismatches with original data")
}
}