Improve ListObjects performance by listing in parallel (#7270)

The side affect of this change memory
increase, but this is a trade-off between
performance and actual memory usage.

For all practical scenarios this should be
an adequate change.
This commit is contained in:
Harshavardhana 2019-02-27 14:39:22 -08:00 committed by kannappanr
parent b983da957d
commit ce588d1489
2 changed files with 21 additions and 28 deletions

View File

@ -1693,7 +1693,7 @@ func (s *TestSuiteCommon) TestListObjectsHandler(c *check) {
{getListObjectsV2URL(s.endPoint, bucketName, "", "1000", "", "url"), []string{"<Key>foo+bar+1</Key>", "<Key>foo+bar+2</Key>"}},
}
for i, testCase := range testCases {
for _, testCase := range testCases {
// create listObjectsV1 request with valid parameters
request, err = newTestSignedRequest("GET", testCase.getURL, 0, nil, s.accessKey, s.secretKey, s.signer)
c.Assert(err, nil)
@ -1706,7 +1706,6 @@ func (s *TestSuiteCommon) TestListObjectsHandler(c *check) {
getContent, err := ioutil.ReadAll(response.Body)
c.Assert(err, nil)
fmt.Printf("Test %d: %+v vs %+v\n", i+1, string(getContent), testCase.expectedStrings)
for _, expectedStr := range testCase.expectedStrings {
c.Assert(strings.Contains(string(getContent), expectedStr), true)
}

View File

@ -641,22 +641,27 @@ func (s *xlSets) CopyObject(ctx context.Context, srcBucket, srcObject, destBucke
// Returns function "listDir" of the type listDirFunc.
// isLeaf - is used by listDir function to check if an entry is a leaf or non-leaf entry.
// disks - used for doing disk.ListDir(). Sets passes set of disks.
func listDirSetsFactory(ctx context.Context, isLeaf isLeafFunc, isLeafDir isLeafDirFunc, sets ...[]StorageAPI) listDirFunc {
func listDirSetsFactory(ctx context.Context, isLeaf isLeafFunc, isLeafDir isLeafDirFunc, sets ...*xlObjects) listDirFunc {
listDirInternal := func(bucket, prefixDir, prefixEntry string, disks []StorageAPI) (mergedEntries []string) {
for _, disk := range disks {
var diskEntries = make([][]string, len(disks))
var wg sync.WaitGroup
for index, disk := range disks {
if disk == nil {
continue
}
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
diskEntries[index], _ = disk.ListDir(bucket, prefixDir, -1)
}(index, disk)
}
var entries []string
wg.Wait()
// Find elements in entries which are not in mergedEntries
for _, entries := range diskEntries {
var newEntries []string
var err error
entries, err = disk.ListDir(bucket, prefixDir, -1)
if err != nil {
continue
}
// Find elements in entries which are not in mergedEntries
for _, entry := range entries {
idx := sort.SearchStrings(mergedEntries, entry)
// if entry is already present in mergedEntries don't add.
@ -672,16 +677,16 @@ func listDirSetsFactory(ctx context.Context, isLeaf isLeafFunc, isLeafDir isLeaf
sort.Strings(mergedEntries)
}
}
return mergedEntries
}
// listDir - lists all the entries at a given prefix and given entry in the prefix.
listDir := func(bucket, prefixDir, prefixEntry string) (mergedEntries []string, delayIsLeaf bool) {
for _, disks := range sets {
entries := listDirInternal(bucket, prefixDir, prefixEntry, disks)
for _, set := range sets {
var newEntries []string
// Find elements in entries which are not in mergedEntries
for _, entry := range entries {
for _, entry := range listDirInternal(bucket, prefixDir, prefixEntry, set.getLoadBalancedDisks()) {
idx := sort.SearchStrings(mergedEntries, entry)
// if entry is already present in mergedEntries don't add.
if idx < len(mergedEntries) && mergedEntries[idx] == entry {
@ -696,8 +701,7 @@ func listDirSetsFactory(ctx context.Context, isLeaf isLeafFunc, isLeafDir isLeaf
sort.Strings(mergedEntries)
}
}
mergedEntries, delayIsLeaf = filterListEntries(bucket, prefixDir, mergedEntries, prefixEntry, isLeaf)
return mergedEntries, delayIsLeaf
return filterListEntries(bucket, prefixDir, mergedEntries, prefixEntry, isLeaf)
}
return listDir
}
@ -743,12 +747,7 @@ func (s *xlSets) ListObjects(ctx context.Context, bucket, prefix, marker, delimi
return false
}
var setDisks = make([][]StorageAPI, len(s.sets))
for _, set := range s.sets {
setDisks = append(setDisks, set.getLoadBalancedDisks())
}
listDir := listDirSetsFactory(ctx, isLeaf, isLeafDir, setDisks...)
listDir := listDirSetsFactory(ctx, isLeaf, isLeafDir, s.sets...)
walkResultCh = startTreeWalk(ctx, bucket, prefix, marker, recursive, listDir, isLeaf, isLeafDir, endWalkCh)
}
@ -1353,12 +1352,7 @@ func (s *xlSets) listObjectsHeal(ctx context.Context, bucket, prefix, marker, de
return false
}
var setDisks = make([][]StorageAPI, len(s.sets))
for _, set := range s.sets {
setDisks = append(setDisks, set.getLoadBalancedDisks())
}
listDir := listDirSetsFactory(ctx, isLeaf, isLeafDir, setDisks...)
listDir := listDirSetsFactory(ctx, isLeaf, isLeafDir, s.sets...)
walkResultCh = startTreeWalk(ctx, bucket, prefix, marker, recursive, listDir, isLeaf, isLeafDir, endWalkCh)
}