erasure encode janitor duty

This commit is contained in:
Anand Babu (AB) Periasamy 2015-03-24 18:44:21 -07:00
parent 62b69048ec
commit fa9b1f341b
4 changed files with 99 additions and 86 deletions

View File

@ -24,7 +24,7 @@ import (
) )
// Integer to Int conversion // Integer to Int conversion
func int2cInt(src_err_list []int) *C.int32_t { func int2CInt(src_err_list []int) *C.int32_t {
var sizeErrInt = int(unsafe.Sizeof(src_err_list[0])) var sizeErrInt = int(unsafe.Sizeof(src_err_list[0]))
switch sizeInt { switch sizeInt {
case sizeErrInt: case sizeErrInt:

View File

@ -32,47 +32,47 @@ import (
// //
// Decoded data is exactly similar in length and content as the original data. // Decoded data is exactly similar in length and content as the original data.
func (e *Encoder) Decode(chunks [][]byte, length int) ([]byte, error) { func (e *Encoder) Decode(chunks [][]byte, length int) ([]byte, error) {
var decode_matrix *C.uint8_t var decodeMatrix *C.uint8_t
var decode_tbls *C.uint8_t var decodeTbls *C.uint8_t
var decode_index *C.uint32_t var decodeIndex *C.uint32_t
var source, target **C.uint8_t var source, target **C.uint8_t
k := e.params.K k := int(e.params.K)
m := e.params.M m := int(e.params.M)
n := k + m n := k + m
if len(chunks) != int(n) { if len(chunks) != n {
return nil, errors.New(fmt.Sprintf("chunks length must be %d", n)) return nil, errors.New(fmt.Sprintf("chunks length must be %d", n))
} }
chunk_size := GetEncodedChunkLen(length, uint8(k)) chunkLen := GetEncodedBlockLen(length, uint8(k))
error_index := make([]int, n+1) errorIndex := make([]int, n+1)
var err_count int = 0 var errCount int = 0
for i := range chunks { for i := range chunks {
// Check of chunks are really null // Check of chunks are really null
if chunks[i] == nil || len(chunks[i]) == 0 { if chunks[i] == nil || len(chunks[i]) == 0 {
error_index[err_count] = i errorIndex[errCount] = i
err_count++ errCount++
} }
} }
error_index[err_count] = -1 errorIndex[errCount] = -1
err_count++ errCount++
// Too many missing chunks, cannot be more than parity `m` // Too many missing chunks, cannot be more than parity `m`
if err_count-1 > int(n-k) { if errCount-1 > int(n-k) {
return nil, errors.New("too many erasures requested, can't decode") return nil, errors.New("too many erasures requested, can't decode")
} }
error_index_ptr := int2cInt(error_index[:err_count]) errorIndex_ptr := int2CInt(errorIndex[:errCount])
for i := range chunks { for i := range chunks {
if chunks[i] == nil || len(chunks[i]) == 0 { if chunks[i] == nil || len(chunks[i]) == 0 {
chunks[i] = make([]byte, chunk_size) chunks[i] = make([]byte, chunkLen)
} }
} }
C.minio_init_decoder(error_index_ptr, C.int(k), C.int(n), C.int(err_count-1), C.minio_init_decoder(errorIndex_ptr, C.int(k), C.int(n), C.int(errCount-1),
e.encode_matrix, &decode_matrix, &decode_tbls, &decode_index) e.encodeMatrix, &decodeMatrix, &decodeTbls, &decodeIndex)
pointers := make([]*byte, n) pointers := make([]*byte, n)
for i := range chunks { for i := range chunks {
@ -81,24 +81,24 @@ func (e *Encoder) Decode(chunks [][]byte, length int) ([]byte, error) {
data := (**C.uint8_t)(unsafe.Pointer(&pointers[0])) data := (**C.uint8_t)(unsafe.Pointer(&pointers[0]))
ret := C.minio_get_source_target(C.int(err_count-1), C.int(k), C.int(m), error_index_ptr, ret := C.minio_get_source_target(C.int(errCount-1), C.int(k), C.int(m), errorIndex_ptr,
decode_index, data, &source, &target) decodeIndex, data, &source, &target)
if int(ret) == -1 { if int(ret) == -1 {
return nil, errors.New("Decoding source target failed") return nil, errors.New("Decoding source target failed")
} }
C.ec_encode_data(C.int(chunk_size), C.int(k), C.int(err_count-1), decode_tbls, C.ec_encode_data(C.int(chunkLen), C.int(k), C.int(errCount-1), decodeTbls,
source, target) source, target)
recovered_output := make([]byte, 0, chunk_size*int(k)) recoveredOutput := make([]byte, 0, chunkLen*int(k))
for i := 0; i < int(k); i++ { for i := 0; i < int(k); i++ {
recovered_output = append(recovered_output, chunks[i]...) recoveredOutput = append(recoveredOutput, chunks[i]...)
} }
// TODO cache this if necessary // TODO cache this if necessary
e.decode_matrix = decode_matrix e.decodeMatrix = decodeMatrix
e.decode_tbls = decode_tbls e.decodeTbls = decodeTbls
return recovered_output[:length], nil return recoveredOutput[:length], nil
} }

View File

@ -26,7 +26,7 @@ import (
"unsafe" "unsafe"
) )
type Technique int type Technique uint8
const ( const (
Vandermonde Technique = iota Vandermonde Technique = iota
@ -52,10 +52,10 @@ type EncoderParams struct {
// Encoder is an object used to encode and decode data. // Encoder is an object used to encode and decode data.
type Encoder struct { type Encoder struct {
params *EncoderParams params *EncoderParams
encode_matrix, encodeMatrix,
encode_tbls, encodeTbls,
decode_matrix, decodeMatrix,
decode_tbls *C.uint8_t decodeTbls *C.uint8_t
} }
// ParseEncoderParams creates an EncoderParams object. // ParseEncoderParams creates an EncoderParams object.
@ -94,30 +94,30 @@ func ParseEncoderParams(k, m uint8, technique Technique) (*EncoderParams, error)
// NewEncoder creates an encoder object with a given set of parameters. // NewEncoder creates an encoder object with a given set of parameters.
func NewEncoder(ep *EncoderParams) *Encoder { func NewEncoder(ep *EncoderParams) *Encoder {
var encode_matrix *C.uint8_t var k = C.int(ep.K)
var encode_tbls *C.uint8_t var m = C.int(ep.M)
k := C.int(ep.K) var encodeMatrix *C.uint8_t
m := C.int(ep.M) var encodeTbls *C.uint8_t
C.minio_init_encoder(C.int(ep.Technique), k, m, &encode_matrix, C.minio_init_encoder(C.int(ep.Technique), k, m, &encodeMatrix,
&encode_tbls) &encodeTbls)
return &Encoder{ return &Encoder{
params: ep, params: ep,
encode_matrix: encode_matrix, encodeMatrix: encodeMatrix,
encode_tbls: encode_tbls, encodeTbls: encodeTbls,
decode_matrix: nil, decodeMatrix: nil,
decode_tbls: nil, decodeTbls: nil,
} }
} }
func GetEncodedLen(inputLen int, k, m uint8) (outputLen int) { func GetEncodedBlocksLen(inputLen int, k, m uint8) (outputLen int) {
outputLen = GetEncodedChunkLen(inputLen, k) * int(k+m) outputLen = GetEncodedBlockLen(inputLen, k) * int(k+m)
return outputLen return outputLen
} }
func GetEncodedChunkLen(inputLen int, k uint8) (outputChunkLen int) { func GetEncodedBlockLen(inputLen int, k uint8) (encodedOutputLen int) {
alignment := int(k) * SIMDAlign alignment := int(k) * SIMDAlign
remainder := inputLen % alignment remainder := inputLen % alignment
@ -125,55 +125,66 @@ func GetEncodedChunkLen(inputLen int, k uint8) (outputChunkLen int) {
if remainder != 0 { if remainder != 0 {
paddedInputLen = inputLen + (alignment - remainder) paddedInputLen = inputLen + (alignment - remainder)
} }
outputChunkLen = paddedInputLen / int(k) encodedOutputLen = paddedInputLen / int(k)
return outputChunkLen return encodedOutputLen
} }
// Encode encodes a block of data. The input is the original data. The output // Encode erasure codes a block of data in "k" data blocks and "m" parity blocks.
// is a 2 tuple containing (k + m) chunks of erasure encoded data and the // Output is [k+m][]blocks of data and parity slices.
// length of the original object. func (e *Encoder) Encode(inputData []byte) (encodedBlocks [][]byte, err error) {
func (e *Encoder) Encode(input []byte) ([][]byte, error) { k := int(e.params.K) // "k" data blocks
inputLen := len(input) m := int(e.params.M) // "m" parity blocks
k := C.int(e.params.K) n := k + m // "n" total encoded blocks
m := C.int(e.params.M)
n := k + m
chunkLen := GetEncodedChunkLen(inputLen, e.params.K) // Length of a single encoded chunk.
encodedDataLen := chunkLen * int(k) // Total number of encoded chunks = "k" data + "m" parity blocks
paddedDataLen := int(encodedDataLen) - inputLen encodedBlockLen := GetEncodedBlockLen(len(inputData), uint8(k))
if paddedDataLen > 0 { // Length of total number of "k" data chunks
s := make([]byte, paddedDataLen) encodedDataBlocksLen := encodedBlockLen * k
// Length of extra padding required for the data blocks.
encodedDataBlocksPadLen := encodedDataBlocksLen - len(inputData)
// Extend inputData buffer to accommodate coded data blocks if necesssary
if encodedDataBlocksPadLen > 0 {
padding := make([]byte, encodedDataBlocksPadLen)
// Expand with new padded blocks to the byte array // Expand with new padded blocks to the byte array
input = append(input, s...) inputData = append(inputData, padding...)
} }
encodedParityLen := chunkLen * int(e.params.M) // Extend inputData buffer to accommodate coded parity blocks
c := make([]byte, encodedParityLen) if true { // create a temporary scope to trigger garbage collect
input = append(input, c...) encodedParityBlocksLen := encodedBlockLen * m
parityBlocks := make([]byte, encodedParityBlocksLen)
// encodedOutLen := encodedDataLen + encodedParityLen inputData = append(inputData, parityBlocks...)
// Allocate chunks
chunks := make([][]byte, k+m)
pointers := make([]*byte, k+m)
var i int
// Add data blocks to chunks
for i = 0; i < int(k); i++ {
chunks[i] = input[i*chunkLen : (i+1)*chunkLen]
pointers[i] = &chunks[i][0]
} }
for i = int(k); i < int(n); i++ { // Allocate memory to the "encoded blocks" return buffer
chunks[i] = make([]byte, chunkLen) encodedBlocks = make([][]byte, n) // Return buffer
pointers[i] = &chunks[i][0]
// Nessary to bridge Go to the C world. C requires 2D arry of pointers to
// byte array. "encodedBlocks" is a 2D slice.
pointersToEncodedBlock := make([]*byte, n) // Pointers to encoded blocks.
// Copy data block slices to encoded block buffer
for i := 0; i < k; i++ {
encodedBlocks[i] = inputData[i*encodedBlockLen : (i+1)*encodedBlockLen]
pointersToEncodedBlock[i] = &encodedBlocks[i][0]
} }
data := (**C.uint8_t)(unsafe.Pointer(&pointers[:k][0])) // Copy erasure block slices to encoded block buffer
coding := (**C.uint8_t)(unsafe.Pointer(&pointers[k:][0])) for i := k; i < n; i++ {
encodedBlocks[i] = make([]byte, encodedBlockLen)
pointersToEncodedBlock[i] = &encodedBlocks[i][0]
}
C.ec_encode_data(C.int(chunkLen), k, m, e.encode_tbls, data, // Erasure code the data into K data blocks and M parity
coding) // blocks. Only the parity blocks are filled. Data blocks remain
return chunks, nil // intact.
C.ec_encode_data(C.int(encodedBlockLen), C.int(k), C.int(m), e.encodeTbls,
(**C.uint8_t)(unsafe.Pointer(&pointersToEncodedBlock[:k][0])), // Pointers to data blocks
(**C.uint8_t)(unsafe.Pointer(&pointersToEncodedBlock[k:][0]))) // Pointers to parity blocks
return encodedBlocks, nil
} }

View File

@ -75,7 +75,9 @@ func erasureReader(readers []io.ReadCloser, donutMetadata map[string]string, wri
if blockSize < totalLeft { if blockSize < totalLeft {
curBlockSize = blockSize curBlockSize = blockSize
} }
curChunkSize := erasure.GetEncodedChunkLen(curBlockSize, uint8(k))
curChunkSize := erasure.GetEncodedBlockLen(curBlockSize, uint8(k))
encodedBytes := make([][]byte, 16) encodedBytes := make([][]byte, 16)
for i, reader := range readers { for i, reader := range readers {
var bytesBuffer bytes.Buffer var bytesBuffer bytes.Buffer