fix: replaced drive properly by healing the entire drive (#10799)

Bonus fixes, we do not need reload format anymore
as the replaced drive is healed locally we only need
to ensure that drive heal reloads the drive properly.

We preserve the UUID of the original order, this means
that the replacement in `format.json` doesn't mean that
the drive needs to be reloaded into memory anymore.

fixes #10791
This commit is contained in:
Harshavardhana 2020-10-31 01:34:48 -07:00 committed by GitHub
parent 5e5cdc581d
commit b686bb9c83
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 20 additions and 227 deletions

View File

@ -1264,17 +1264,6 @@ func (z *erasureServerSets) ListBuckets(ctx context.Context) (buckets []BucketIn
return buckets, nil
}
func (z *erasureServerSets) ReloadFormat(ctx context.Context, dryRun bool) error {
// No locks needed since reload happens in HealFormat under
// write lock across all nodes.
for _, zone := range z.serverSets {
if err := zone.ReloadFormat(ctx, dryRun); err != nil {
return err
}
}
return nil
}
func (z *erasureServerSets) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) {
// Acquire lock on format.json
formatLock := z.NewNSLock(ctx, minioMetaBucket, formatConfigFile)
@ -1306,15 +1295,6 @@ func (z *erasureServerSets) HealFormat(ctx context.Context, dryRun bool) (madmin
r.After.Drives = append(r.After.Drives, result.After.Drives...)
}
// Healing succeeded notify the peers to reload format and re-initialize disks.
// We will not notify peers if healing is not required.
for _, nerr := range globalNotificationSys.ReloadFormat(dryRun) {
if nerr.Err != nil {
logger.GetReqInfo(ctx).SetTags("peerAddress", nerr.Host.String())
logger.LogIf(ctx, nerr.Err)
}
}
// No heal returned by all serverSets, return errNoHealRequired
if countNoHeal == len(z.serverSets) {
return r, errNoHealRequired

View File

@ -1141,81 +1141,6 @@ func formatsToDrivesInfo(endpoints Endpoints, formats []*formatErasureV3, sErrs
return beforeDrives
}
// Reloads the format from the disk, usually called by a remote peer notifier while
// healing in a distributed setup.
func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) {
storageDisks, errs := initStorageDisksWithErrorsWithoutHealthCheck(s.endpoints)
for i, err := range errs {
if err != nil && err != errDiskNotFound {
return fmt.Errorf("Disk %s: %w", s.endpoints[i], err)
}
}
defer func(storageDisks []StorageAPI) {
if err != nil {
closeStorageDisks(storageDisks)
}
}(storageDisks)
formats, _ := loadFormatErasureAll(storageDisks, false)
if err = checkFormatErasureValues(formats, s.setDriveCount); err != nil {
return err
}
refFormat, err := getFormatErasureInQuorum(formats)
if err != nil {
return err
}
s.monitorContextCancel() // turn-off disk monitoring and replace format.
s.erasureDisksMu.Lock()
// Replace with new reference format.
s.format = refFormat
// Close all existing disks and reconnect all the disks.
for _, disk := range storageDisks {
if disk == nil {
continue
}
diskID, err := disk.GetDiskID()
if err != nil {
continue
}
m, n, err := findDiskIndexByDiskID(refFormat, diskID)
if err != nil {
continue
}
if s.erasureDisks[m][n] != nil {
s.erasureDisks[m][n].Close()
}
s.endpointStrings[m*s.setDriveCount+n] = disk.String()
if !disk.IsLocal() {
// Enable healthcheck disk for remote endpoint.
disk, err = newStorageAPI(disk.Endpoint())
if err != nil {
continue
}
disk.SetDiskID(diskID)
}
s.erasureDisks[m][n] = disk
}
s.erasureDisksMu.Unlock()
mctx, mctxCancel := context.WithCancel(GlobalContext)
s.monitorContextCancel = mctxCancel
go s.monitorAndConnectEndpoints(mctx, defaultMonitorConnectEndpointInterval)
return nil
}
// If it is a single node Erasure and all disks are root disks, it is most likely a test setup, else it is a production setup.
// On a test setup we allow creation of format.json on root disks to help with dev/testing.
func isTestSetup(infos []DiskInfo, errs []error) bool {
@ -1335,13 +1260,8 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
}
}
// Save formats `format.json` across all disks.
if err = saveFormatErasureAllWithErrs(ctx, storageDisks, sErrs, tmpNewFormats); err != nil {
return madmin.HealResultItem{}, err
}
refFormat, err = getFormatErasureInQuorum(tmpNewFormats)
if err != nil {
// Save new formats `format.json` on unformatted disks.
if err = saveUnformattedFormat(ctx, storageDisks, tmpNewFormats); err != nil {
return madmin.HealResultItem{}, err
}
@ -1349,21 +1269,12 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
s.erasureDisksMu.Lock()
// Replace with new reference format.
s.format = refFormat
// Disconnect/relinquish all existing disks, lockers and reconnect the disks, lockers.
for _, disk := range storageDisks {
if disk == nil {
for index, format := range tmpNewFormats {
if format == nil {
continue
}
diskID, err := disk.GetDiskID()
if err != nil {
continue
}
m, n, err := findDiskIndexByDiskID(refFormat, diskID)
m, n, err := findDiskIndexByDiskID(refFormat, format.Erasure.This)
if err != nil {
continue
}
@ -1372,18 +1283,12 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
s.erasureDisks[m][n].Close()
}
s.endpointStrings[m*s.setDriveCount+n] = disk.String()
if !disk.IsLocal() {
// Enable healthcheck disk for remote endpoint.
disk, err = newStorageAPI(disk.Endpoint())
if err != nil {
continue
s.erasureDisks[m][n] = storageDisks[index]
s.endpointStrings[m*s.setDriveCount+n] = storageDisks[index].String()
}
disk.SetDiskID(diskID)
}
s.erasureDisks[m][n] = disk
}
// Replace with new reference format.
s.format = refFormat
s.erasureDisksMu.Unlock()

View File

@ -704,28 +704,18 @@ func initErasureMetaVolumesInLocalDisks(storageDisks []StorageAPI, formats []*fo
return nil
}
// saveFormatErasureAllWithErrs - populates `format.json` on disks in its order.
// saveUnformattedFormat - populates `format.json` on unformatted disks.
// also adds `.healing.bin` on the disks which are being actively healed.
func saveFormatErasureAllWithErrs(ctx context.Context, storageDisks []StorageAPI, fErrs []error, formats []*formatErasureV3) error {
g := errgroup.WithNErrs(len(storageDisks))
// Write `format.json` to all disks.
for index := range storageDisks {
index := index
g.Go(func() error {
if formats[index] == nil {
return errDiskNotFound
func saveUnformattedFormat(ctx context.Context, storageDisks []StorageAPI, formats []*formatErasureV3) error {
for index, format := range formats {
if format == nil {
continue
}
if err := saveFormatErasure(storageDisks[index], format, true); err != nil {
return err
}
if errors.Is(fErrs[index], errUnformattedDisk) {
return saveFormatErasure(storageDisks[index], formats[index], true)
}
return nil
}, index)
}
writeQuorum := getWriteQuorum(len(storageDisks))
// Wait for the routines to finish.
return reduceWriteQuorumErrs(ctx, g.Wait(), nil, writeQuorum)
}
// saveFormatErasureAll - populates `format.json` on disks in its order.

View File

@ -1536,12 +1536,6 @@ func (fs *FSObjects) DeleteObjectTags(ctx context.Context, bucket, object string
return fs.PutObjectTags(ctx, bucket, object, "", opts)
}
// ReloadFormat - no-op for fs, Valid only for Erasure.
func (fs *FSObjects) ReloadFormat(ctx context.Context, dryRun bool) error {
logger.LogIf(ctx, NotImplemented{})
return NotImplemented{}
}
// HealFormat - no-op for fs, Valid only for Erasure.
func (fs *FSObjects) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) {
logger.LogIf(ctx, NotImplemented{})

View File

@ -160,11 +160,6 @@ func (a GatewayUnsupported) DeleteBucketSSEConfig(ctx context.Context, bucket st
return NotImplemented{}
}
// ReloadFormat - Not implemented stub.
func (a GatewayUnsupported) ReloadFormat(ctx context.Context, dryRun bool) error {
return NotImplemented{}
}
// HealFormat - Not implemented stub
func (a GatewayUnsupported) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) {
return madmin.HealResultItem{}, NotImplemented{}

View File

@ -140,21 +140,6 @@ func (g *NotificationGroup) Go(ctx context.Context, f func() error, index int, a
}()
}
// ReloadFormat - calls ReloadFormat REST call on all peers.
func (sys *NotificationSys) ReloadFormat(dryRun bool) []NotificationPeerErr {
ng := WithNPeers(len(sys.peerClients))
for idx, client := range sys.peerClients {
if client == nil {
continue
}
client := client
ng.Go(GlobalContext, func() error {
return client.ReloadFormat(dryRun)
}, idx, *client.host)
}
return ng.Wait()
}
// DeletePolicy - deletes policy across all peers.
func (sys *NotificationSys) DeletePolicy(policyName string) []NotificationPeerErr {
ng := WithNPeers(len(sys.peerClients))

View File

@ -114,7 +114,6 @@ type ObjectLayer interface {
CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (objInfo ObjectInfo, err error)
// Healing operations.
ReloadFormat(ctx context.Context, dryRun bool) error
HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error)
HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (madmin.HealResultItem, error)
HealObject(ctx context.Context, bucket, object, versionID string, opts madmin.HealOpts) (madmin.HealResultItem, error)

View File

@ -456,19 +456,6 @@ func (client *peerRESTClient) DeleteBucketMetadata(bucket string) error {
return nil
}
// ReloadFormat - reload format on the peer node.
func (client *peerRESTClient) ReloadFormat(dryRun bool) error {
values := make(url.Values)
values.Set(peerRESTDryRun, strconv.FormatBool(dryRun))
respBody, err := client.call(peerRESTMethodReloadFormat, values, nil, -1)
if err != nil {
return err
}
defer http.DrainBody(respBody)
return nil
}
// cycleServerBloomFilter will cycle the bloom filter to start recording to index y if not already.
// The response will contain a bloom filter starting at index x up to, but not including index y.
// If y is 0, the response will not update y, but return the currently recorded information

View File

@ -50,7 +50,6 @@ const (
peerRESTMethodLoadGroup = "/loadgroup"
peerRESTMethodStartProfiling = "/startprofiling"
peerRESTMethodDownloadProfilingData = "/downloadprofilingdata"
peerRESTMethodReloadFormat = "/reloadformat"
peerRESTMethodCycleBloom = "/cyclebloom"
peerRESTMethodTrace = "/trace"
peerRESTMethodListen = "/listen"
@ -72,7 +71,6 @@ const (
peerRESTIsGroup = "is-group"
peerRESTSignal = "signal"
peerRESTProfiler = "profiler"
peerRESTDryRun = "dry-run"
peerRESTTraceAll = "all"
peerRESTTraceErr = "err"

View File

@ -578,45 +578,6 @@ func (s *peerRESTServer) LoadBucketMetadataHandler(w http.ResponseWriter, r *htt
}
}
// ReloadFormatHandler - Reload Format.
func (s *peerRESTServer) ReloadFormatHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("Invalid request"))
return
}
vars := mux.Vars(r)
dryRunString := vars[peerRESTDryRun]
if dryRunString == "" {
s.writeErrorResponse(w, errors.New("dry-run parameter is missing"))
return
}
var dryRun bool
switch strings.ToLower(dryRunString) {
case "true":
dryRun = true
case "false":
dryRun = false
default:
s.writeErrorResponse(w, errInvalidArgument)
return
}
objAPI := newObjectLayerFn()
if objAPI == nil {
s.writeErrorResponse(w, errServerNotInitialized)
return
}
err := objAPI.ReloadFormat(GlobalContext, dryRun)
if err != nil {
s.writeErrorResponse(w, err)
return
}
w.(http.Flusher).Flush()
}
// CycleServerBloomFilterHandler cycles bloom filter on server.
func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
@ -1093,7 +1054,6 @@ func registerPeerRESTHandlers(router *mux.Router) {
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodStartProfiling).HandlerFunc(httpTraceAll(server.StartProfilingHandler)).Queries(restQueries(peerRESTProfiler)...)
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodDownloadProfilingData).HandlerFunc(httpTraceHdrs(server.DownloadProfilingDataHandler))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodReloadFormat).HandlerFunc(httpTraceHdrs(server.ReloadFormatHandler)).Queries(restQueries(peerRESTDryRun)...)
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodTrace).HandlerFunc(server.TraceHandler)
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodListen).HandlerFunc(httpTraceHdrs(server.ListenHandler))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodBackgroundHealStatus).HandlerFunc(server.BackgroundHealStatusHandler)