mirror of
https://github.com/minio/minio.git
synced 2025-11-09 21:49:46 -05:00
crawler: Missing folder heal check per set (#10876)
This commit is contained in:
@@ -627,29 +627,6 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
|
||||
disks = disks[:askDisks]
|
||||
}
|
||||
|
||||
var readers = make([]*metacacheReader, askDisks)
|
||||
for i := range disks {
|
||||
r, w := io.Pipe()
|
||||
d := disks[i]
|
||||
readers[i], err = newMetacacheReader(r)
|
||||
if err != nil {
|
||||
cancel()
|
||||
return entries, err
|
||||
}
|
||||
// Send request to each disk.
|
||||
go func() {
|
||||
err := d.WalkDir(ctx, WalkDirOptions{
|
||||
Bucket: o.Bucket,
|
||||
BaseDir: o.BaseDir,
|
||||
Recursive: o.Recursive || o.Separator != SlashSeparator,
|
||||
FilterPrefix: o.FilterPrefix}, w)
|
||||
w.CloseWithError(err)
|
||||
if err != io.EOF {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Create output for our results.
|
||||
cacheCh := make(chan metaCacheEntry, metacacheBlockSize)
|
||||
|
||||
@@ -746,92 +723,38 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
|
||||
bucket: o.Bucket,
|
||||
}
|
||||
|
||||
topEntries := make(metaCacheEntries, len(readers))
|
||||
for {
|
||||
// Get the top entry from each
|
||||
var current metaCacheEntry
|
||||
var atEOF, agree int
|
||||
for i, r := range readers {
|
||||
topEntries[i].name = ""
|
||||
entry, err := r.peek()
|
||||
switch err {
|
||||
case io.EOF:
|
||||
atEOF++
|
||||
continue
|
||||
case nil:
|
||||
default:
|
||||
closeChannels()
|
||||
metaMu.Lock()
|
||||
meta.status = scanStateError
|
||||
meta.error = err.Error()
|
||||
metaMu.Unlock()
|
||||
return
|
||||
err := listPathRaw(ctx, listPathRawOptions{
|
||||
disks: disks,
|
||||
bucket: o.Bucket,
|
||||
path: o.BaseDir,
|
||||
recursive: o.Recursive,
|
||||
filterPrefix: o.FilterPrefix,
|
||||
minDisks: askDisks - 1,
|
||||
agreed: func(entry metaCacheEntry) {
|
||||
cacheCh <- entry
|
||||
filterCh <- entry
|
||||
},
|
||||
partial: func(entries metaCacheEntries, nAgreed int, errs []error) {
|
||||
// Results Disagree :-(
|
||||
entry, ok := entries.resolve(&resolver)
|
||||
if ok {
|
||||
cacheCh <- *entry
|
||||
filterCh <- *entry
|
||||
}
|
||||
// If no current, add it.
|
||||
if current.name == "" {
|
||||
topEntries[i] = entry
|
||||
current = entry
|
||||
agree++
|
||||
continue
|
||||
}
|
||||
// If exact match, we agree.
|
||||
if current.matches(&entry, o.Bucket) {
|
||||
topEntries[i] = entry
|
||||
agree++
|
||||
continue
|
||||
}
|
||||
// If only the name matches we didn't agree, but add it for resolution.
|
||||
if entry.name == current.name {
|
||||
topEntries[i] = entry
|
||||
continue
|
||||
}
|
||||
// We got different entries
|
||||
if entry.name > current.name {
|
||||
continue
|
||||
}
|
||||
// We got a new, better current.
|
||||
// Clear existing entries.
|
||||
for i := range topEntries[:i] {
|
||||
topEntries[i] = metaCacheEntry{}
|
||||
}
|
||||
agree = 1
|
||||
current = entry
|
||||
topEntries[i] = entry
|
||||
}
|
||||
// Break if all at EOF.
|
||||
if atEOF == len(readers) {
|
||||
break
|
||||
}
|
||||
if agree == len(readers) {
|
||||
// Everybody agreed
|
||||
for _, r := range readers {
|
||||
r.skip(1)
|
||||
}
|
||||
cacheCh <- topEntries[0]
|
||||
filterCh <- topEntries[0]
|
||||
continue
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
// Results Disagree :-(
|
||||
entry, ok := topEntries.resolve(&resolver)
|
||||
if ok {
|
||||
cacheCh <- *entry
|
||||
filterCh <- *entry
|
||||
}
|
||||
// Skip the inputs we used.
|
||||
for i, r := range readers {
|
||||
if topEntries[i].name != "" {
|
||||
r.skip(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save success
|
||||
metaMu.Lock()
|
||||
if err != nil {
|
||||
meta.status = scanStateError
|
||||
meta.error = err.Error()
|
||||
}
|
||||
// Save success
|
||||
if meta.error == "" {
|
||||
meta.status = scanStateSuccess
|
||||
meta.endedCycle = intDataUpdateTracker.current()
|
||||
}
|
||||
|
||||
meta, _ = o.updateMetacacheListing(meta, rpc)
|
||||
metaMu.Unlock()
|
||||
|
||||
@@ -847,3 +770,184 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
|
||||
|
||||
return filteredResults()
|
||||
}
|
||||
|
||||
type listPathRawOptions struct {
|
||||
disks []StorageAPI
|
||||
bucket, path string
|
||||
recursive bool
|
||||
filterPrefix string
|
||||
// Minimum number of good disks to continue.
|
||||
// An error will be returned if this many disks returned an error.
|
||||
minDisks int
|
||||
reportNotFound bool
|
||||
|
||||
// Callbacks with results:
|
||||
// If set to nil, it will not be called.
|
||||
|
||||
// agreed is called if all disks agreed.
|
||||
agreed func(entry metaCacheEntry)
|
||||
|
||||
// partial will be returned when there is disagreement between disks.
|
||||
// if disk did not return any result, but also haven't errored
|
||||
// the entry will be empty and errs will
|
||||
partial func(entries metaCacheEntries, nAgreed int, errs []error)
|
||||
|
||||
// finished will be called when all streams have finished and
|
||||
// more than one disk returned an error.
|
||||
// Will not be called if everything operates as expected.
|
||||
finished func(errs []error)
|
||||
}
|
||||
|
||||
// listPathRaw will list a path on the provided drives.
|
||||
// See listPathRawOptions on how results are delivered.
|
||||
// Directories are always returned.
|
||||
// Cache will be bypassed.
|
||||
// Context cancellation will be respected but may take a while to effectuate.
|
||||
func listPathRaw(ctx context.Context, opts listPathRawOptions) (err error) {
|
||||
disks := opts.disks
|
||||
if len(disks) == 0 {
|
||||
return fmt.Errorf("listPathRaw: 0 drives provided")
|
||||
}
|
||||
|
||||
// Disconnect from call above, but cancel on exit.
|
||||
ctx, cancel := context.WithCancel(GlobalContext)
|
||||
defer cancel()
|
||||
|
||||
askDisks := len(disks)
|
||||
var readers = make([]*metacacheReader, askDisks)
|
||||
for i := range disks {
|
||||
r, w := io.Pipe()
|
||||
d := disks[i]
|
||||
readers[i], err = newMetacacheReader(r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Send request to each disk.
|
||||
go func() {
|
||||
err := d.WalkDir(ctx, WalkDirOptions{
|
||||
Bucket: opts.bucket,
|
||||
BaseDir: opts.path,
|
||||
Recursive: opts.recursive,
|
||||
ReportNotFound: opts.reportNotFound,
|
||||
FilterPrefix: opts.filterPrefix}, w)
|
||||
w.CloseWithError(err)
|
||||
if err != io.EOF {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
topEntries := make(metaCacheEntries, len(readers))
|
||||
errs := make([]error, len(readers))
|
||||
for {
|
||||
// Get the top entry from each
|
||||
var current metaCacheEntry
|
||||
var atEOF, fnf, hasErr, agree int
|
||||
for i := range topEntries {
|
||||
topEntries[i] = metaCacheEntry{}
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
for i, r := range readers {
|
||||
if errs[i] != nil {
|
||||
hasErr++
|
||||
continue
|
||||
}
|
||||
entry, err := r.peek()
|
||||
switch err {
|
||||
case io.EOF:
|
||||
atEOF++
|
||||
continue
|
||||
case nil:
|
||||
default:
|
||||
if err.Error() == errFileNotFound.Error() {
|
||||
atEOF++
|
||||
fnf++
|
||||
continue
|
||||
}
|
||||
|
||||
hasErr++
|
||||
errs[i] = err
|
||||
continue
|
||||
}
|
||||
// If no current, add it.
|
||||
if current.name == "" {
|
||||
topEntries[i] = entry
|
||||
current = entry
|
||||
agree++
|
||||
continue
|
||||
}
|
||||
// If exact match, we agree.
|
||||
if current.matches(&entry, opts.bucket) {
|
||||
topEntries[i] = entry
|
||||
agree++
|
||||
continue
|
||||
}
|
||||
// If only the name matches we didn't agree, but add it for resolution.
|
||||
if entry.name == current.name {
|
||||
topEntries[i] = entry
|
||||
continue
|
||||
}
|
||||
// We got different entries
|
||||
if entry.name > current.name {
|
||||
continue
|
||||
}
|
||||
// We got a new, better current.
|
||||
// Clear existing entries.
|
||||
for i := range topEntries[:i] {
|
||||
topEntries[i] = metaCacheEntry{}
|
||||
}
|
||||
agree = 1
|
||||
current = entry
|
||||
topEntries[i] = entry
|
||||
}
|
||||
|
||||
// Stop if we exceed number of bad disks
|
||||
if hasErr > len(disks)-opts.minDisks && hasErr > 0 {
|
||||
if opts.finished != nil {
|
||||
opts.finished(errs)
|
||||
}
|
||||
var combinedErr []string
|
||||
for i, err := range errs {
|
||||
if err != nil {
|
||||
combinedErr = append(combinedErr, fmt.Sprintf("disk %d returned: %s", i, err))
|
||||
}
|
||||
}
|
||||
return errors.New(strings.Join(combinedErr, ", "))
|
||||
}
|
||||
|
||||
// Break if all at EOF or error.
|
||||
if atEOF+hasErr == len(readers) {
|
||||
if hasErr > 0 && opts.finished != nil {
|
||||
opts.finished(errs)
|
||||
}
|
||||
break
|
||||
}
|
||||
if fnf == len(readers) {
|
||||
return errFileNotFound
|
||||
}
|
||||
if agree == len(readers) {
|
||||
// Everybody agreed
|
||||
for _, r := range readers {
|
||||
r.skip(1)
|
||||
}
|
||||
if opts.agreed != nil {
|
||||
opts.agreed(current)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if opts.partial != nil {
|
||||
opts.partial(topEntries, agree, errs)
|
||||
}
|
||||
// Skip the inputs we used.
|
||||
for i, r := range readers {
|
||||
if topEntries[i].name != "" {
|
||||
r.skip(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user