Add large bucket support for erasure coded backend (#5160)

This PR implements an object layer which
combines input erasure sets of XL layers
into a unified namespace.

This object layer extends the existing
erasure coded implementation, it is assumed
in this design that providing > 16 disks is
a static configuration as well i.e if you started
the setup with 32 disks with 4 sets 8 disks per
pack then you would need to provide 4 sets always.

Some design details and restrictions:

- Objects are distributed using consistent ordering
  to a unique erasure coded layer.
- Each pack has its own dsync so locks are synchronized
  properly at pack (erasure layer).
- Each pack still has a maximum of 16 disks
  requirement, you can start with multiple
  such sets statically.
- Static sets set of disks and cannot be
  changed, there is no elastic expansion allowed.
- Static sets set of disks and cannot be
  changed, there is no elastic removal allowed.
- ListObjects() across sets can be noticeably
  slower since List happens on all servers,
  and is merged at this sets layer.

Fixes #5465
Fixes #5464
Fixes #5461
Fixes #5460
Fixes #5459
Fixes #5458
Fixes #5460
Fixes #5488
Fixes #5489
Fixes #5497
Fixes #5496
This commit is contained in:
Harshavardhana
2018-02-15 17:45:57 -08:00
committed by kannappanr
parent dd80256151
commit fb96779a8a
82 changed files with 5046 additions and 4771 deletions

77
pkg/bpool/bpool.go Normal file
View File

@@ -0,0 +1,77 @@
// Original work https://github.com/oxtoacart/bpool borrowed
// only bpool.go licensed under Apache 2.0.
// This file modifies original bpool.go to add one more option
// to provide []byte capacity for better GC management.
/*
* Minio Cloud Storage (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package bpool
// BytePoolCap implements a leaky pool of []byte in the form of a bounded channel.
type BytePoolCap struct {
c chan []byte
w int
wcap int
}
// NewBytePoolCap creates a new BytePool bounded to the given maxSize, with new
// byte arrays sized based on width.
func NewBytePoolCap(maxSize int, width int, capwidth int) (bp *BytePoolCap) {
return &BytePoolCap{
c: make(chan []byte, maxSize),
w: width,
wcap: capwidth,
}
}
// Get gets a []byte from the BytePool, or creates a new one if none are
// available in the pool.
func (bp *BytePoolCap) Get() (b []byte) {
select {
case b = <-bp.c:
// reuse existing buffer
default:
// create new buffer
if bp.wcap > 0 {
b = make([]byte, bp.w, bp.wcap)
} else {
b = make([]byte, bp.w)
}
}
return
}
// Put returns the given Buffer to the BytePool.
func (bp *BytePoolCap) Put(b []byte) {
select {
case bp.c <- b:
// buffer went back into pool
default:
// buffer didn't go back into pool, just discard
}
}
// Width returns the width of the byte arrays in this pool.
func (bp *BytePoolCap) Width() (n int) {
return bp.w
}
// WidthCap returns the cap width of the byte arrays in this pool.
func (bp *BytePoolCap) WidthCap() (n int) {
return bp.wcap
}

96
pkg/bpool/bpool_test.go Normal file
View File

@@ -0,0 +1,96 @@
// Original work https://github.com/oxtoacart/bpool borrowed
// only bpool.go licensed under Apache 2.0.
// This file modifies original bpool.go to add one more option
// to provide []byte capacity for better GC management.
/*
* Minio Cloud Storage (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package bpool
import "testing"
// Tests - bytePool functionality.
func TestBytePool(t *testing.T) {
var size = 4
var width = 10
var capWidth = 16
bufPool := NewBytePoolCap(size, width, capWidth)
// Check the width
if bufPool.Width() != width {
t.Fatalf("bytepool width invalid: got %v want %v", bufPool.Width(), width)
}
// Check with width cap
if bufPool.WidthCap() != capWidth {
t.Fatalf("bytepool capWidth invalid: got %v want %v", bufPool.WidthCap(), capWidth)
}
// Check that retrieved buffer are of the expected width
b := bufPool.Get()
if len(b) != width {
t.Fatalf("bytepool length invalid: got %v want %v", len(b), width)
}
if cap(b) != capWidth {
t.Fatalf("bytepool length invalid: got %v want %v", cap(b), capWidth)
}
bufPool.Put(b)
// Fill the pool beyond the capped pool size.
for i := 0; i < size*2; i++ {
bufPool.Put(make([]byte, bufPool.w))
}
b = bufPool.Get()
if len(b) != width {
t.Fatalf("bytepool length invalid: got %v want %v", len(b), width)
}
if cap(b) != capWidth {
t.Fatalf("bytepool length invalid: got %v want %v", cap(b), capWidth)
}
bufPool.Put(b)
// Close the channel so we can iterate over it.
close(bufPool.c)
// Check the size of the pool.
if len(bufPool.c) != size {
t.Fatalf("bytepool size invalid: got %v want %v", len(bufPool.c), size)
}
bufPoolNoCap := NewBytePoolCap(size, width, 0)
// Check the width
if bufPoolNoCap.Width() != width {
t.Fatalf("bytepool width invalid: got %v want %v", bufPool.Width(), width)
}
// Check with width cap
if bufPoolNoCap.WidthCap() != 0 {
t.Fatalf("bytepool capWidth invalid: got %v want %v", bufPool.WidthCap(), 0)
}
b = bufPoolNoCap.Get()
if len(b) != width {
t.Fatalf("bytepool length invalid: got %v want %v", len(b), width)
}
if cap(b) != width {
t.Fatalf("bytepool length invalid: got %v want %v", cap(b), width)
}
}