mirror of
https://github.com/minio/minio.git
synced 2025-11-10 05:59:43 -05:00
Add large bucket support for erasure coded backend (#5160)
This PR implements an object layer which combines input erasure sets of XL layers into a unified namespace. This object layer extends the existing erasure coded implementation, it is assumed in this design that providing > 16 disks is a static configuration as well i.e if you started the setup with 32 disks with 4 sets 8 disks per pack then you would need to provide 4 sets always. Some design details and restrictions: - Objects are distributed using consistent ordering to a unique erasure coded layer. - Each pack has its own dsync so locks are synchronized properly at pack (erasure layer). - Each pack still has a maximum of 16 disks requirement, you can start with multiple such sets statically. - Static sets set of disks and cannot be changed, there is no elastic expansion allowed. - Static sets set of disks and cannot be changed, there is no elastic removal allowed. - ListObjects() across sets can be noticeably slower since List happens on all servers, and is merged at this sets layer. Fixes #5465 Fixes #5464 Fixes #5461 Fixes #5460 Fixes #5459 Fixes #5458 Fixes #5460 Fixes #5488 Fixes #5489 Fixes #5497 Fixes #5496
This commit is contained in:
committed by
kannappanr
parent
dd80256151
commit
fb96779a8a
@@ -34,8 +34,8 @@ import (
|
||||
const (
|
||||
// Admin service names
|
||||
signalServiceRPC = "Admin.SignalService"
|
||||
reInitFormatRPC = "Admin.ReInitFormat"
|
||||
listLocksRPC = "Admin.ListLocks"
|
||||
reInitDisksRPC = "Admin.ReInitDisks"
|
||||
serverInfoDataRPC = "Admin.ServerInfoData"
|
||||
getConfigRPC = "Admin.GetConfig"
|
||||
writeTmpConfigRPC = "Admin.WriteTmpConfig"
|
||||
@@ -56,8 +56,8 @@ type remoteAdminClient struct {
|
||||
// commands like service stop and service restart.
|
||||
type adminCmdRunner interface {
|
||||
SignalService(s serviceSignal) error
|
||||
ReInitFormat(dryRun bool) error
|
||||
ListLocks(bucket, prefix string, duration time.Duration) ([]VolumeLockInfo, error)
|
||||
ReInitDisks() error
|
||||
ServerInfoData() (ServerInfoData, error)
|
||||
GetConfig() ([]byte, error)
|
||||
WriteTmpConfig(tmpFileName string, configBytes []byte) error
|
||||
@@ -77,6 +77,16 @@ func (lc localAdminClient) SignalService(s serviceSignal) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReInitFormat - re-initialize disk format.
|
||||
func (lc localAdminClient) ReInitFormat(dryRun bool) error {
|
||||
objectAPI := newObjectLayerFn()
|
||||
if objectAPI == nil {
|
||||
return errServerNotInitialized
|
||||
}
|
||||
_, err := objectAPI.HealFormat(dryRun)
|
||||
return err
|
||||
}
|
||||
|
||||
// ListLocks - Fetches lock information from local lock instrumentation.
|
||||
func (lc localAdminClient) ListLocks(bucket, prefix string, duration time.Duration) ([]VolumeLockInfo, error) {
|
||||
return listLocksInfo(bucket, prefix, duration), nil
|
||||
@@ -92,7 +102,14 @@ func (rc remoteAdminClient) SignalService(s serviceSignal) (err error) {
|
||||
err = errUnsupportedSignal
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// ReInitFormat - re-initialize disk format, remotely.
|
||||
func (rc remoteAdminClient) ReInitFormat(dryRun bool) error {
|
||||
reply := AuthRPCReply{}
|
||||
return rc.Call(reInitFormatRPC, &ReInitFormatArgs{
|
||||
DryRun: dryRun,
|
||||
}, &reply)
|
||||
}
|
||||
|
||||
// ListLocks - Sends list locks command to remote server via RPC.
|
||||
@@ -109,20 +126,6 @@ func (rc remoteAdminClient) ListLocks(bucket, prefix string, duration time.Durat
|
||||
return reply.VolLocks, nil
|
||||
}
|
||||
|
||||
// ReInitDisks - There is nothing to do here, heal format REST API
|
||||
// handler has already formatted and reinitialized the local disks.
|
||||
func (lc localAdminClient) ReInitDisks() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReInitDisks - Signals peers via RPC to reinitialize their disks and
|
||||
// object layer.
|
||||
func (rc remoteAdminClient) ReInitDisks() error {
|
||||
args := AuthRPCArgs{}
|
||||
reply := AuthRPCReply{}
|
||||
return rc.Call(reInitDisksRPC, &args, &reply)
|
||||
}
|
||||
|
||||
// ServerInfoData - Returns the server info of this server.
|
||||
func (lc localAdminClient) ServerInfoData() (sid ServerInfoData, e error) {
|
||||
if globalBootTime.IsZero() {
|
||||
@@ -240,6 +243,7 @@ func (rc remoteAdminClient) CommitConfig(tmpFileName string) error {
|
||||
type adminPeer struct {
|
||||
addr string
|
||||
cmdRunner adminCmdRunner
|
||||
isLocal bool
|
||||
}
|
||||
|
||||
// type alias for a collection of adminPeer.
|
||||
@@ -254,6 +258,7 @@ func makeAdminPeers(endpoints EndpointList) (adminPeerList adminPeers) {
|
||||
adminPeerList = append(adminPeerList, adminPeer{
|
||||
thisPeer,
|
||||
localAdminClient{},
|
||||
true,
|
||||
})
|
||||
|
||||
hostSet := set.CreateStringSet(globalMinioAddr)
|
||||
@@ -280,6 +285,26 @@ func makeAdminPeers(endpoints EndpointList) (adminPeerList adminPeers) {
|
||||
return adminPeerList
|
||||
}
|
||||
|
||||
// peersReInitFormat - reinitialize remote object layers to new format.
|
||||
func peersReInitFormat(peers adminPeers, dryRun bool) error {
|
||||
errs := make([]error, len(peers))
|
||||
|
||||
// Send ReInitFormat RPC call to all nodes.
|
||||
// for local adminPeer this is a no-op.
|
||||
wg := sync.WaitGroup{}
|
||||
for i, peer := range peers {
|
||||
wg.Add(1)
|
||||
go func(idx int, peer adminPeer) {
|
||||
defer wg.Done()
|
||||
if !peer.isLocal {
|
||||
errs[idx] = peer.cmdRunner.ReInitFormat(dryRun)
|
||||
}
|
||||
}(i, peer)
|
||||
}
|
||||
wg.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Initialize global adminPeer collection.
|
||||
func initGlobalAdminPeers(endpoints EndpointList) {
|
||||
globalAdminPeers = makeAdminPeers(endpoints)
|
||||
@@ -363,24 +388,6 @@ func listPeerLocksInfo(peers adminPeers, bucket, prefix string, duration time.Du
|
||||
return groupedLockInfos, nil
|
||||
}
|
||||
|
||||
// reInitPeerDisks - reinitialize disks and object layer on peer servers to use the new format.
|
||||
func reInitPeerDisks(peers adminPeers) error {
|
||||
errs := make([]error, len(peers))
|
||||
|
||||
// Send ReInitDisks RPC call to all nodes.
|
||||
// for local adminPeer this is a no-op.
|
||||
wg := sync.WaitGroup{}
|
||||
for i, peer := range peers {
|
||||
wg.Add(1)
|
||||
go func(idx int, peer adminPeer) {
|
||||
defer wg.Done()
|
||||
errs[idx] = peer.cmdRunner.ReInitDisks()
|
||||
}(i, peer)
|
||||
}
|
||||
wg.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
// uptimeSlice - used to sort uptimes in chronological order.
|
||||
type uptimeSlice []struct {
|
||||
err error
|
||||
|
||||
Reference in New Issue
Block a user