From d5ce2f694466d6e034ea0ca490f6cb2da781619a Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 5 Oct 2015 22:33:39 -0700 Subject: [PATCH] Make erasure matrix type not optional choose automatically Remove option of providing Technique and handling errors based on that choose a matrix type automatically based on number of data blocks. INTEL recommends on using cauchy for consistent invertible matrices, while vandermonde is faster we should default to cauchy for large data blocks. --- pkg/donut/bucket.go | 9 +-- pkg/donut/definitions.go | 9 ++- pkg/donut/encoder.go | 38 +++-------- pkg/donut/errors.go | 16 ----- pkg/erasure/doc.go | 7 +- pkg/erasure/{ec_isal-l.h => ec.h} | 0 pkg/erasure/ec_minio_common.h | 2 +- pkg/erasure/ec_minio_decode.c | 2 +- pkg/erasure/ec_minio_encode.c | 10 ++- pkg/erasure/erasure_decode.go | 2 +- pkg/erasure/erasure_encode.go | 42 ++---------- .../{cauchy_test.go => erasure_test.go} | 39 +++++++++-- pkg/erasure/vandermonde_test.go | 66 ------------------- 13 files changed, 67 insertions(+), 175 deletions(-) rename pkg/erasure/{ec_isal-l.h => ec.h} (100%) rename pkg/erasure/{cauchy_test.go => erasure_test.go} (74%) delete mode 100644 pkg/erasure/vandermonde_test.go diff --git a/pkg/donut/bucket.go b/pkg/donut/bucket.go index 089503356..be446b36d 100644 --- a/pkg/donut/bucket.go +++ b/pkg/donut/bucket.go @@ -288,7 +288,6 @@ func (b bucket) WriteObject(objectName string, objectData io.Reader, size int64, objMetadata.ChunkCount = chunkCount objMetadata.DataDisks = k objMetadata.ParityDisks = m - objMetadata.ErasureTechnique = "Cauchy" objMetadata.Size = int64(totalLength) } objMetadata.Bucket = b.getBucketName() @@ -431,7 +430,7 @@ func (b bucket) getDataAndParity(totalWriters int) (k uint8, m uint8, err *probe // writeObjectData - func (b bucket) writeObjectData(k, m uint8, writers []io.WriteCloser, objectData io.Reader, size int64, hashWriter io.Writer) (int, int, *probe.Error) { - encoder, err := newEncoder(k, m, "Cauchy") + encoder, err := newEncoder(k, m) if err != nil { return 0, 0, err.Trace() } @@ -503,11 +502,7 @@ func (b bucket) readObjectData(objectName string, writer *io.PipeWriter, objMeta mwriter := io.MultiWriter(writer, hasher, sum512hasher) switch len(readers) > 1 { case true: - if objMetadata.ErasureTechnique == "" { - writer.CloseWithError(probe.WrapError(probe.NewError(MissingErasureTechnique{}))) - return - } - encoder, err := newEncoder(objMetadata.DataDisks, objMetadata.ParityDisks, objMetadata.ErasureTechnique) + encoder, err := newEncoder(objMetadata.DataDisks, objMetadata.ParityDisks) if err != nil { writer.CloseWithError(probe.WrapError(err)) return diff --git a/pkg/donut/definitions.go b/pkg/donut/definitions.go index 7da3c4a44..2fe1f8f1c 100644 --- a/pkg/donut/definitions.go +++ b/pkg/donut/definitions.go @@ -30,11 +30,10 @@ type ObjectMetadata struct { Size int64 `json:"size"` // erasure - DataDisks uint8 `json:"sys.erasureK"` - ParityDisks uint8 `json:"sys.erasureM"` - ErasureTechnique string `json:"sys.erasureTechnique"` - BlockSize int `json:"sys.blockSize"` - ChunkCount int `json:"sys.chunkCount"` + DataDisks uint8 `json:"sys.erasureK"` + ParityDisks uint8 `json:"sys.erasureM"` + BlockSize int `json:"sys.blockSize"` + ChunkCount int `json:"sys.chunkCount"` // checksums MD5Sum string `json:"sys.md5sum"` diff --git a/pkg/donut/encoder.go b/pkg/donut/encoder.go index 6c3161383..056ea586f 100644 --- a/pkg/donut/encoder.go +++ b/pkg/donut/encoder.go @@ -23,41 +23,21 @@ import ( // encoder internal struct type encoder struct { - encoder *encoding.Erasure - k, m uint8 - technique encoding.Technique -} - -// getErasureTechnique - convert technique string into Technique type -func getErasureTechnique(technique string) (encoding.Technique, *probe.Error) { - switch true { - case technique == "Cauchy": - return encoding.Cauchy, nil - case technique == "Vandermonde": - return encoding.Cauchy, nil - default: - return encoding.None, probe.NewError(InvalidErasureTechnique{Technique: technique}) - } + encoder *encoding.Erasure + k, m uint8 } // newEncoder - instantiate a new encoder -func newEncoder(k, m uint8, technique string) (encoder, *probe.Error) { +func newEncoder(k, m uint8) (encoder, *probe.Error) { e := encoder{} - t, err := getErasureTechnique(technique) + params, err := encoding.ValidateParams(k, m) if err != nil { - return encoder{}, err.Trace() - } - { - params, err := encoding.ValidateParams(k, m, t) - if err != nil { - return encoder{}, probe.NewError(err) - } - e.encoder = encoding.NewErasure(params) - e.k = k - e.m = m - e.technique = t - return e, nil + return encoder{}, probe.NewError(err) } + e.encoder = encoding.NewErasure(params) + e.k = k + e.m = m + return e, nil } // TODO - think again if this is needed diff --git a/pkg/donut/errors.go b/pkg/donut/errors.go index 52d793c29..e5c97e803 100644 --- a/pkg/donut/errors.go +++ b/pkg/donut/errors.go @@ -132,22 +132,6 @@ func (e MissingPOSTPolicy) Error() string { return "Missing POST policy in multipart form" } -// MissingErasureTechnique missing erasure technique -type MissingErasureTechnique struct{} - -func (e MissingErasureTechnique) Error() string { - return "Missing erasure technique" -} - -// InvalidErasureTechnique invalid erasure technique -type InvalidErasureTechnique struct { - Technique string -} - -func (e InvalidErasureTechnique) Error() string { - return "Invalid erasure technique: " + e.Technique -} - // InternalError - generic internal error type InternalError struct { } diff --git a/pkg/erasure/doc.go b/pkg/erasure/doc.go index ce47fb64d..4e7edbc63 100644 --- a/pkg/erasure/doc.go +++ b/pkg/erasure/doc.go @@ -39,10 +39,9 @@ // // But here are the few points to keep in mind // -// Techniques: +// Matrix Type: // - Vandermonde is most commonly used method for choosing coefficients in erasure // encoding but does not guarantee invertable for every sub matrix. -// Users may want to adjust for k > 5. (k is data blocks) // - Whereas Cauchy is our recommended method for choosing coefficients in erasure coding. // Since any sub-matrix of a Cauchy matrix is invertable. // @@ -53,14 +52,14 @@ // // Creating and using an encoder // var bytes []byte -// params := erasure.ValidateParams(10, 5, erasure.Cauchy) +// params := erasure.ValidateParams(10, 5) // encoder := erasure.NewErasure(params) // encodedData, length := encoder.Encode(bytes) // // Creating and using a decoder // var encodedData [][]byte // var length int -// params := erasure.ValidateParams(10, 5, erasure.Cauchy) +// params := erasure.ValidateParams(10, 5) // encoder := erasure.NewErasure(params) // originalData, err := encoder.Decode(encodedData, length) // diff --git a/pkg/erasure/ec_isal-l.h b/pkg/erasure/ec.h similarity index 100% rename from pkg/erasure/ec_isal-l.h rename to pkg/erasure/ec.h diff --git a/pkg/erasure/ec_minio_common.h b/pkg/erasure/ec_minio_common.h index f55897a4c..ee7965c22 100644 --- a/pkg/erasure/ec_minio_common.h +++ b/pkg/erasure/ec_minio_common.h @@ -19,7 +19,7 @@ #include -int32_t minio_init_encoder (int technique, int k, int m, +int32_t minio_init_encoder (int k, int m, unsigned char **encode_matrix, unsigned char **encode_tbls); diff --git a/pkg/erasure/ec_minio_decode.c b/pkg/erasure/ec_minio_decode.c index 457d68230..8c6f683e7 100644 --- a/pkg/erasure/ec_minio_decode.c +++ b/pkg/erasure/ec_minio_decode.c @@ -18,7 +18,7 @@ #include #include -#include "ec_isal-l.h" +#include "ec.h" #include "ec_minio_common.h" static diff --git a/pkg/erasure/ec_minio_encode.c b/pkg/erasure/ec_minio_encode.c index 8b3288350..8270bd1a6 100644 --- a/pkg/erasure/ec_minio_encode.c +++ b/pkg/erasure/ec_minio_encode.c @@ -17,16 +17,14 @@ #include #include -#include "ec_isal-l.h" +#include "ec.h" #include "ec_minio_common.h" /* Generate encode matrix during the encoding phase */ -int32_t minio_init_encoder (int technique, int k, int m, - unsigned char **encode_matrix, - unsigned char **encode_tbls) +int32_t minio_init_encoder (int k, int m, unsigned char **encode_matrix, unsigned char **encode_tbls) { unsigned char *tmp_matrix; unsigned char *tmp_tbls; @@ -34,7 +32,7 @@ int32_t minio_init_encoder (int technique, int k, int m, tmp_matrix = (unsigned char *) malloc (k * (k + m)); tmp_tbls = (unsigned char *) malloc (k * (k + m) * 32); - if (technique == 0) { + if (k < 5) { /* Commonly used method for choosing coefficients in erasure encoding but does not guarantee invertable for every sub @@ -44,7 +42,7 @@ int32_t minio_init_encoder (int technique, int k, int m, -- Intel */ gf_gen_rs_matrix (tmp_matrix, k + m, k); - } else if (technique == 1) { + } else { gf_gen_cauchy1_matrix (tmp_matrix, k + m, k); } diff --git a/pkg/erasure/erasure_decode.go b/pkg/erasure/erasure_decode.go index 23ea31520..ee7dcac5b 100644 --- a/pkg/erasure/erasure_decode.go +++ b/pkg/erasure/erasure_decode.go @@ -18,7 +18,7 @@ package erasure // #cgo CFLAGS: -O0 // #include -// #include "ec_isal-l.h" +// #include "ec.h" // #include "ec_minio_common.h" import "C" import ( diff --git a/pkg/erasure/erasure_encode.go b/pkg/erasure/erasure_encode.go index 46c8c6ec7..8a5a92089 100644 --- a/pkg/erasure/erasure_encode.go +++ b/pkg/erasure/erasure_encode.go @@ -17,7 +17,7 @@ package erasure // #include -// #include "ec_isal-l.h" +// #include "ec.h" // #include "ec_minio_common.h" import "C" import ( @@ -25,22 +25,6 @@ import ( "unsafe" ) -// Technique - type of matrix type used in encoding -type Technique uint8 - -// Different types of supported matrix types -const ( - Vandermonde Technique = iota - Cauchy - None -) - -// Default Data and Parity blocks -const ( - K = 10 - M = 3 -) - // Block alignment const ( SIMDAlign = 32 @@ -48,9 +32,8 @@ const ( // Params is a configuration set for building an encoder. It is created using ValidateParams(). type Params struct { - K uint8 - M uint8 - Technique Technique // cauchy or vandermonde matrix (RS) + K uint8 + M uint8 } // Erasure is an object used to encode and decode data. @@ -66,7 +49,7 @@ type Erasure struct { // k and m represent the matrix size, which corresponds to the protection level // technique is the matrix type. Valid inputs are Cauchy (recommended) or Vandermonde. // -func ValidateParams(k, m uint8, technique Technique) (*Params, error) { +func ValidateParams(k, m uint8) (*Params, error) { if k < 1 { return nil, errors.New("k cannot be zero") } @@ -79,19 +62,9 @@ func ValidateParams(k, m uint8, technique Technique) (*Params, error) { return nil, errors.New("(k + m) cannot be bigger than Galois field GF(2^8) - 1") } - switch technique { - case Vandermonde: - break - case Cauchy: - break - default: - return nil, errors.New("Technique can be either vandermonde or cauchy") - } - return &Params{ - K: k, - M: m, - Technique: technique, + K: k, + M: m, }, nil } @@ -103,8 +76,7 @@ func NewErasure(ep *Params) *Erasure { var encodeMatrix *C.uchar var encodeTbls *C.uchar - C.minio_init_encoder(C.int(ep.Technique), k, m, &encodeMatrix, - &encodeTbls) + C.minio_init_encoder(k, m, &encodeMatrix, &encodeTbls) return &Erasure{ params: ep, diff --git a/pkg/erasure/cauchy_test.go b/pkg/erasure/erasure_test.go similarity index 74% rename from pkg/erasure/cauchy_test.go rename to pkg/erasure/erasure_test.go index 3118ebebb..b54666a86 100644 --- a/pkg/erasure/cauchy_test.go +++ b/pkg/erasure/erasure_test.go @@ -34,8 +34,15 @@ const ( m = 5 ) -func (s *MySuite) TestCauchyEncodeDecodeFailure(c *C) { - ep, _ := ValidateParams(k, m, Cauchy) +func corruptChunks(chunks [][]byte, errorIndex []int) [][]byte { + for _, err := range errorIndex { + chunks[err] = nil + } + return chunks +} + +func (s *MySuite) TestEncodeDecodeFailure(c *C) { + ep, _ := ValidateParams(k, m) data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.") @@ -50,8 +57,8 @@ func (s *MySuite) TestCauchyEncodeDecodeFailure(c *C) { c.Assert(err, Not(IsNil)) } -func (s *MySuite) TestCauchyEncodeDecodeSuccess(c *C) { - ep, _ := ValidateParams(k, m, Cauchy) +func (s *MySuite) TestEncodeDecodeSuccess(c *C) { + ep, _ := ValidateParams(k, m) data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.") @@ -69,3 +76,27 @@ func (s *MySuite) TestCauchyEncodeDecodeSuccess(c *C) { c.Fatalf("Recovered data mismatches with original data") } } + +func (s *MySuite) TestEncodeDecodeSuccessBuffer(c *C) { + ep, _ := ValidateParams(k, m) + + tmpBuffer := new(bytes.Buffer) + for i := 0; i < 1024*1024; i++ { + tmpBuffer.Write([]byte("Hello world, hello world")) + } + + e := NewErasure(ep) + chunks, err := e.Encode(tmpBuffer.Bytes()) + c.Assert(err, IsNil) + + errorIndex := []int{0, 3, 5, 9, 13} + chunks = corruptChunks(chunks, errorIndex) + + recoveredData, err := e.Decode(chunks, len(tmpBuffer.Bytes())) + c.Assert(err, IsNil) + + if !bytes.Equal(tmpBuffer.Bytes(), recoveredData) { + c.Fatalf("Recovered data mismatches with original data") + } + +} diff --git a/pkg/erasure/vandermonde_test.go b/pkg/erasure/vandermonde_test.go deleted file mode 100644 index 960779d91..000000000 --- a/pkg/erasure/vandermonde_test.go +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2014 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package erasure - -import ( - "bytes" - - . "gopkg.in/check.v1" -) - -func corruptChunks(chunks [][]byte, errorIndex []int) [][]byte { - for _, err := range errorIndex { - chunks[err] = nil - } - return chunks -} - -func (s *MySuite) TestVanderMondeEncodeDecodeFailure(c *C) { - ep, _ := ValidateParams(k, m, Vandermonde) - - data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.") - - e := NewErasure(ep) - chunks, err := e.Encode(data) - c.Assert(err, IsNil) - - errorIndex := []int{0, 3, 5, 9, 11, 13} - chunks = corruptChunks(chunks, errorIndex) - - _, err = e.Decode(chunks, len(data)) - c.Assert(err, Not(IsNil)) -} - -func (s *MySuite) TestVanderMondeEncodeDecodeSuccess(c *C) { - ep, _ := ValidateParams(k, m, Vandermonde) - - data := []byte("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.") - - e := NewErasure(ep) - chunks, err := e.Encode(data) - c.Assert(err, IsNil) - - errorIndex := []int{0, 3, 5, 9, 13} - chunks = corruptChunks(chunks, errorIndex) - - recoveredData, err := e.Decode(chunks, len(data)) - c.Assert(err, IsNil) - - if !bytes.Equal(recoveredData, data) { - c.Fatalf("Recovered data mismatches with original data") - } -}